You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2018/04/10 04:48:47 UTC

[5/6] impala git commit: IMPALA-5607: part 1: breaking extract/date_part changes

IMPALA-5607: part 1: breaking extract/date_part changes

This is the compatibility-breaking part of Jinchul Kim's change
to add additional units. To support nanoseconds we need to
widen the output type of these functions. We also change
the meaning of "milliseconds" to include the seconds component.

Cherry-picks: not for 2.x

Change-Id: I42d83712d9bb3a4900bec38a9c009dcf2a1fe019
Reviewed-on: http://gerrit.cloudera.org:8080/9957
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2995be82
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2995be82
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2995be82

Branch: refs/heads/master
Commit: 2995be8238898660cf68cf125c4a52dea727f898
Parents: f76b1bf
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Apr 9 11:30:10 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Apr 10 04:00:37 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 124 +++++++++----------
 be/src/exprs/udf-builtins-ir.cc                 |  42 ++++---
 be/src/exprs/udf-builtins.h                     |   4 +-
 common/function-registry/impala_functions.py    |   4 +-
 .../queries/QueryTest/aggregation.test          |   2 +-
 .../queries/QueryTest/exprs.test                |   4 +-
 6 files changed, 94 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2995be82/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index a381af9..0f3f3d4 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -6731,73 +6731,73 @@ TEST_F(ExprTest, TimestampFunctions) {
       "cast(trunc(cast('2012-09-10 07:59:59' as timestamp), 'XXYYZZ') as string)");
 
   // Extract as a regular function
-  TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'YEAR')",
-            TYPE_INT, 2006);
-  TestValue("extract('2006-05-12 18:27:28.12345', 'YEAR')", TYPE_INT, 2006);
-  TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'MoNTH')",
-            TYPE_INT, 5);
-  TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'DaY')",
-            TYPE_INT, 12);
-  TestValue("extract(cast('2006-05-12 06:27:28.12345' as timestamp), 'hour')",
-            TYPE_INT, 6);
-  TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'MINUTE')",
-            TYPE_INT, 27);
-  TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'SECOND')",
-            TYPE_INT, 28);
-  TestValue("extract(cast('2006-05-12 18:27:28.12345' as timestamp), 'MILLISECOND')",
-            TYPE_INT, 123);
-  TestValue("extract(cast('2006-05-13 01:27:28.12345' as timestamp), 'EPOCH')",
-            TYPE_INT, 1147483648);
-  TestNonOkStatus("extract(cast('2006-05-13 01:27:28.12345' as timestamp), 'foo')");
-  TestNonOkStatus("extract(cast('2006-05-13 01:27:28.12345' as timestamp), NULL)");
-  TestIsNull("extract(NULL, 'EPOCH')", TYPE_INT);
+  TestValue("extract(cast('2006-05-12 18:27:28.123456789' as timestamp), 'YEAR')",
+            TYPE_BIGINT, 2006);
+  TestValue("extract('2006-05-12 18:27:28.123456789', 'YEAR')", TYPE_BIGINT, 2006);
+  TestValue("extract(cast('2006-05-12 18:27:28.123456789' as timestamp), 'MoNTH')",
+            TYPE_BIGINT, 5);
+  TestValue("extract(cast('2006-05-12 18:27:28.123456789' as timestamp), 'DaY')",
+            TYPE_BIGINT, 12);
+  TestValue("extract(cast('2006-05-12 06:27:28.123456789' as timestamp), 'hour')",
+            TYPE_BIGINT, 6);
+  TestValue("extract(cast('2006-05-12 18:27:28.123456789' as timestamp), 'MINUTE')",
+            TYPE_BIGINT, 27);
+  TestValue("extract(cast('2006-05-12 18:27:28.123456789' as timestamp), 'SECOND')",
+            TYPE_BIGINT, 28);
+  TestValue("extract(cast('2006-05-12 18:27:28.123456789' as timestamp), 'MILLISECOND')",
+            TYPE_BIGINT, 28123);
+  TestValue("extract(cast('2006-05-13 01:27:28.123456789' as timestamp), 'EPOCH')",
+            TYPE_BIGINT, 1147483648);
+  TestNonOkStatus("extract(cast('2006-05-13 01:27:28.123456789' as timestamp), 'foo')");
+  TestNonOkStatus("extract(cast('2006-05-13 01:27:28.123456789' as timestamp), NULL)");
+  TestIsNull("extract(NULL, 'EPOCH')", TYPE_BIGINT);
   TestNonOkStatus("extract(NULL, NULL)");
 
   // Extract using FROM keyword
-  TestValue("extract(YEAR from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 2006);
-  TestValue("extract(QUARTER from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 2);
-  TestValue("extract(MoNTH from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 5);
-  TestValue("extract(DaY from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 12);
-  TestValue("extract(hour from cast('2006-05-12 06:27:28.12345' as timestamp))",
-            TYPE_INT, 6);
-  TestValue("extract(MINUTE from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 27);
-  TestValue("extract(SECOND from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 28);
-  TestValue("extract(MILLISECOND from cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 123);
-  TestValue("extract(EPOCH from cast('2006-05-13 01:27:28.12345' as timestamp))",
-            TYPE_INT, 1147483648);
-  TestNonOkStatus("extract(foo from cast('2006-05-13 01:27:28.12345' as timestamp))");
-  TestNonOkStatus("extract(NULL from cast('2006-05-13 01:27:28.12345' as timestamp))");
-  TestIsNull("extract(EPOCH from NULL)", TYPE_INT);
+  TestValue("extract(YEAR from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 2006);
+  TestValue("extract(QUARTER from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 2);
+  TestValue("extract(MoNTH from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 5);
+  TestValue("extract(DaY from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 12);
+  TestValue("extract(hour from cast('2006-05-12 06:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 6);
+  TestValue("extract(MINUTE from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 27);
+  TestValue("extract(SECOND from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 28);
+  TestValue("extract(MILLISECOND from cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 28123);
+  TestValue("extract(EPOCH from cast('2006-05-13 01:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 1147483648);
+  TestNonOkStatus("extract(foo from cast('2006-05-13 01:27:28.123456789' as timestamp))");
+  TestNonOkStatus("extract(NULL from cast('2006-05-13 01:27:28.123456789' as timestamp))");
+  TestIsNull("extract(EPOCH from NULL)", TYPE_BIGINT);
 
   // Date_part, same as extract function but with arguments swapped
-  TestValue("date_part('YEAR', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 2006);
-  TestValue("date_part('QUARTER', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 2);
-  TestValue("date_part('MoNTH', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 5);
-  TestValue("date_part('DaY', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 12);
-  TestValue("date_part('hour', cast('2006-05-12 06:27:28.12345' as timestamp))",
-            TYPE_INT, 6);
-  TestValue("date_part('MINUTE', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 27);
-  TestValue("date_part('SECOND', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 28);
-  TestValue("date_part('MILLISECOND', cast('2006-05-12 18:27:28.12345' as timestamp))",
-            TYPE_INT, 123);
-  TestValue("date_part('EPOCH', cast('2006-05-13 01:27:28.12345' as timestamp))",
-            TYPE_INT, 1147483648);
-  TestNonOkStatus("date_part('foo', cast('2006-05-13 01:27:28.12345' as timestamp))");
-  TestNonOkStatus("date_part(NULL, cast('2006-05-13 01:27:28.12345' as timestamp))");
-  TestIsNull("date_part('EPOCH', NULL)", TYPE_INT);
+  TestValue("date_part('YEAR', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 2006);
+  TestValue("date_part('QUARTER', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 2);
+  TestValue("date_part('MoNTH', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 5);
+  TestValue("date_part('DaY', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 12);
+  TestValue("date_part('hour', cast('2006-05-12 06:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 6);
+  TestValue("date_part('MINUTE', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 27);
+  TestValue("date_part('SECOND', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 28);
+  TestValue("date_part('MILLISECOND', cast('2006-05-12 18:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 28123);
+  TestValue("date_part('EPOCH', cast('2006-05-13 01:27:28.123456789' as timestamp))",
+            TYPE_BIGINT, 1147483648);
+  TestNonOkStatus("date_part('foo', cast('2006-05-13 01:27:28.123456789' as timestamp))");
+  TestNonOkStatus("date_part(NULL, cast('2006-05-13 01:27:28.123456789' as timestamp))");
+  TestIsNull("date_part('EPOCH', NULL)", TYPE_BIGINT);
   TestNonOkStatus("date_part(NULL, NULL)");
 
   // Test with timezone offset

http://git-wip-us.apache.org/repos/asf/impala/blob/2995be82/be/src/exprs/udf-builtins-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins-ir.cc b/be/src/exprs/udf-builtins-ir.cc
index 3c47e6b..c870fae 100644
--- a/be/src/exprs/udf-builtins-ir.cc
+++ b/be/src/exprs/udf-builtins-ir.cc
@@ -25,6 +25,7 @@
 #include <string>
 
 #include "gen-cpp/Exprs_types.h"
+#include "gutil/walltime.h"
 #include "runtime/runtime-state.h"
 #include "runtime/timestamp-value.h"
 #include "udf/udf-internal.h"
@@ -113,6 +114,13 @@ TimestampVal UdfBuiltins::DateTrunc(
   return DateTruncImpl(context, tv, unit_str);
 }
 
+static int64_t ExtractMillisecond(const time_duration& time) {
+  // Fractional seconds are nanoseconds because Boost is configured
+  // to use nanoseconds precision
+  return time.fractional_seconds() / (NANOS_PER_MICRO * MICROS_PER_MILLI)
+       + time.seconds() * MILLIS_PER_SEC;
+}
+
 // Maps the user facing name of a unit to a TExtractField
 // Returns the TExtractField for the given unit
 TExtractField::type StrToExtractField(FunctionContext* ctx, const StringVal& unit_str) {
@@ -130,11 +138,11 @@ TExtractField::type StrToExtractField(FunctionContext* ctx, const StringVal& uni
   return TExtractField::INVALID_FIELD;
 }
 
-IntVal UdfBuiltins::Extract(FunctionContext* context, const StringVal& unit_str,
+BigIntVal UdfBuiltins::Extract(FunctionContext* context, const StringVal& unit_str,
     const TimestampVal& tv) {
   // resolve extract_field using the prepared state if possible, o.w. parse now
   // ExtractPrepare() can only parse extract_field if user passes it as a string literal
-  if (tv.is_null) return IntVal::null();
+  if (tv.is_null) return BigIntVal::null();
 
   TExtractField::type field;
   void* state = context->GetFunctionState(FunctionContext::THREAD_LOCAL);
@@ -145,7 +153,7 @@ IntVal UdfBuiltins::Extract(FunctionContext* context, const StringVal& unit_str,
     if (field == TExtractField::INVALID_FIELD) {
       string string_unit(reinterpret_cast<char*>(unit_str.ptr), unit_str.len);
       context->SetError(Substitute("invalid extract field: $0", string_unit).c_str());
-      return IntVal::null();
+      return BigIntVal::null();
     }
   }
 
@@ -157,16 +165,16 @@ IntVal UdfBuiltins::Extract(FunctionContext* context, const StringVal& unit_str,
     case TExtractField::QUARTER:
     case TExtractField::MONTH:
     case TExtractField::DAY:
-      if (orig_date.is_special()) return IntVal::null();
+      if (orig_date.is_special()) return BigIntVal::null();
       break;
     case TExtractField::HOUR:
     case TExtractField::MINUTE:
     case TExtractField::SECOND:
     case TExtractField::MILLISECOND:
-      if (time.is_special()) return IntVal::null();
+      if (time.is_special()) return BigIntVal::null();
       break;
     case TExtractField::EPOCH:
-      if (time.is_special() || orig_date.is_special()) return IntVal::null();
+      if (time.is_special() || orig_date.is_special()) return BigIntVal::null();
       break;
     case TExtractField::INVALID_FIELD:
       DCHECK(false);
@@ -174,44 +182,44 @@ IntVal UdfBuiltins::Extract(FunctionContext* context, const StringVal& unit_str,
 
   switch (field) {
     case TExtractField::YEAR: {
-      return IntVal(orig_date.year());
+      return BigIntVal(orig_date.year());
     }
     case TExtractField::QUARTER: {
       int m = orig_date.month();
-      return IntVal((m - 1) / 3 + 1);
+      return BigIntVal((m - 1) / 3 + 1);
     }
     case TExtractField::MONTH: {
-      return IntVal(orig_date.month());
+      return BigIntVal(orig_date.month());
     }
     case TExtractField::DAY: {
-      return IntVal(orig_date.day());
+      return BigIntVal(orig_date.day());
     }
     case TExtractField::HOUR: {
-      return IntVal(time.hours());
+      return BigIntVal(time.hours());
     }
     case TExtractField::MINUTE: {
-      return IntVal(time.minutes());
+      return BigIntVal(time.minutes());
     }
     case TExtractField::SECOND: {
-      return IntVal(time.seconds());
+      return BigIntVal(time.seconds());
     }
     case TExtractField::MILLISECOND: {
-      return IntVal(time.total_milliseconds() - time.total_seconds() * 1000);
+      return BigIntVal(ExtractMillisecond(time));
     }
     case TExtractField::EPOCH: {
       ptime epoch_date(date(1970, 1, 1), time_duration(0, 0, 0));
       ptime cur_date(orig_date, time);
       time_duration diff = cur_date - epoch_date;
-      return IntVal(diff.total_seconds());
+      return BigIntVal(diff.total_seconds());
     }
     default: {
       DCHECK(false) << field;
-      return IntVal::null();
+      return BigIntVal::null();
     }
   }
 }
 
-IntVal UdfBuiltins::Extract(FunctionContext* context, const TimestampVal& tv,
+BigIntVal UdfBuiltins::Extract(FunctionContext* context, const TimestampVal& tv,
     const StringVal& unit_str) {
   return Extract(context, unit_str, tv);
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/2995be82/be/src/exprs/udf-builtins.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins.h b/be/src/exprs/udf-builtins.h
index 4eeb33a..8aec873 100644
--- a/be/src/exprs/udf-builtins.h
+++ b/be/src/exprs/udf-builtins.h
@@ -117,12 +117,12 @@ class UdfBuiltins {
   ///    http://docs.oracle.com/cd/B19306_01/server.102/b14200/functions050.htm
   ///
   /// This is used by the DATE_PART function.
-  static IntVal Extract(FunctionContext* context, const StringVal& field_str,
+  static BigIntVal Extract(FunctionContext* context, const StringVal& field_str,
       const TimestampVal& date);
 
   /// This is for the EXTRACT(Timestamp, String) and EXTRACT(Timeunit FROM
   /// Timestamp) functions.
-  static IntVal Extract(FunctionContext* context, const TimestampVal& date,
+  static BigIntVal Extract(FunctionContext* context, const TimestampVal& date,
       const StringVal& field_str);
   /// This is used by the DATE_PART function.
   static void ExtractPrepare(FunctionContext* context,

http://git-wip-us.apache.org/repos/asf/impala/blob/2995be82/common/function-registry/impala_functions.py
----------------------------------------------------------------------
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index 6c405c9..f86098a 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -88,11 +88,11 @@ visible_functions = [
   # TIMESTAMPs meaning EXTRACT(STRING, STRING) is valid. If EXTRACT(STRING, TIMESTAMP)
   # is added, it takes precedence over the existing EXTRACT(TIMESTAMP, STRING)
   # which could break users.
-  [['extract'], 'INT', ['TIMESTAMP', 'STRING'],
+  [['extract'], 'BIGINT', ['TIMESTAMP', 'STRING'],
    '_ZN6impala11UdfBuiltins7ExtractEPN10impala_udf15FunctionContextERKNS1_12TimestampValERKNS1_9StringValE',
    '_ZN6impala11UdfBuiltins21SwappedExtractPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
    '_ZN6impala11UdfBuiltins12ExtractCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
-  [['date_part'], 'INT', ['STRING', 'TIMESTAMP'],
+  [['date_part'], 'BIGINT', ['STRING', 'TIMESTAMP'],
    '_ZN6impala11UdfBuiltins7ExtractEPN10impala_udf15FunctionContextERKNS1_9StringValERKNS1_12TimestampValE',
    '_ZN6impala11UdfBuiltins14ExtractPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
    '_ZN6impala11UdfBuiltins12ExtractCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],

http://git-wip-us.apache.org/repos/asf/impala/blob/2995be82/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
index 7bb510b..88dd97c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
@@ -1231,7 +1231,7 @@ order by 1, 2;
 2010,11,1350
 2010,12,1395
 ---- TYPES
-INT,INT,BIGINT
+BIGINT,BIGINT,BIGINT
 ====
 ---- QUERY
 # IMPALA-2089: Tests correct elimination of redundant predicates.

http://git-wip-us.apache.org/repos/asf/impala/blob/2995be82/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 1ac96f0..7677aa6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -33,7 +33,7 @@ cross join
 'millisecond',10
 'epoch',1361205960
 ---- TYPES
-string, int
+string, bigint
 ====
 ---- QUERY
 # EXTRACT fields from timestamp
@@ -46,7 +46,7 @@ from alltypesagg order by id limit 5
 2010,100
 2010,300
 ---- TYPES
-int, int
+bigint, bigint
 ====
 ---- QUERY
 # IS NULL predicate