You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/12/05 23:42:40 UTC

[2/2] incubator-impala git commit: IMPALA-4498: crash in to_utc_timestamp/from_utc_timestamp

IMPALA-4498: crash in to_utc_timestamp/from_utc_timestamp

The bugs was that the functions did not check whether the conversion
pushed the value out of range. The fix is to use boost's validation
immediately to check the validity of the timestamp and catch any
exceptions thrown.

It would be preferable to avoid the exceptions, but Boost does not
provide a straightforward way to disable the exceptions or extract
potentially-invalid values from a date object.

Testing:
Added expression tests that exercise out-of-range cases. Also
added additional tests to confirm that date addition and subtraction
weren't affected by similar bugs.

Change-Id: Idc427b06ac33ec874a05cb98d01c00e970d3dde6
Reviewed-on: http://gerrit.cloudera.org:8080/5251
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1495b200
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1495b200
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1495b200

Branch: refs/heads/master
Commit: 1495b2007d9645053dae71ca4dbf4e3a48558ef4
Parents: 730ac1b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Nov 28 15:29:37 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Mon Dec 5 23:33:44 2016 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 48 +++++++++++++-
 be/src/exprs/timestamp-functions.cc             | 70 +++++++++++++++-----
 .../queries/QueryTest/exprs.test                | 48 ++++++++++++++
 3 files changed, 147 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1495b200/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 1340f55..f439aa9 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3580,6 +3580,36 @@ TEST_F(ExprTest, TimestampFunctions) {
       "CAST('9995-12-11 00:00:00' AS TIMESTAMP) + INTERVAL 61 MONTH", TYPE_TIMESTAMP);
   TestIsNull(
       "CAST('9995-12-11 00:00:00' AS TIMESTAMP) - INTERVAL -61 MONTH", TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 1000 DAYS", TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 1 DAYS", TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 10000 HOURS", TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 24 HOURS", TYPE_TIMESTAMP);
+  TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 1000000 MINUTES",
+      TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 13 MINUTES", TYPE_TIMESTAMP);
+  TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000 SECONDS",
+      TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 SECONDS", TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000 MILLISECONDS",
+      TYPE_TIMESTAMP);
+  TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 MILLISECONDS",
+      TYPE_TIMESTAMP);
+  TestIsNull(
+      "CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000000 MICROSECONDS",
+      TYPE_TIMESTAMP);
+  TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 MICROSECONDS",
+      TYPE_TIMESTAMP);
+  TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000000000 "
+      "NANOSECONDS", TYPE_TIMESTAMP);
+  TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 NANOSECONDS",
+      TYPE_TIMESTAMP);
   // Add/sub months.
   TestStringValue("cast(date_add(cast('2012-01-01 09:10:11.123456789' "
       "as timestamp), interval 13 months) as string)",
@@ -4067,11 +4097,23 @@ TEST_F(ExprTest, TimestampFunctions) {
   }
 
   // Hive silently ignores bad timezones.  We log a problem.
-  TestStringValue(
-      "cast(from_utc_timestamp("
-      "cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)",
+  TestStringValue("cast(from_utc_timestamp("
+                  "cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)",
       "1970-01-01 00:00:00");
 
+  // These return NULL because timezone conversion makes the value out
+  // of range.
+  TestIsNull("to_utc_timestamp(CAST(\"1400-01-01 05:00:00\" as TIMESTAMP), \"AEST\")",
+      TYPE_TIMESTAMP);
+  TestIsNull("from_utc_timestamp(CAST(\"1400-01-01 05:00:00\" as TIMESTAMP), \"PST\")",
+      TYPE_TIMESTAMP);
+  // TODO: IMPALA-4549: these should return NULL, but validation doesn't catch year 10000.
+  // Just check that we don't crash.
+  GetValue("from_utc_timestamp(CAST(\"10000-12-31 21:00:00\" as TIMESTAMP), \"JST\")",
+      TYPE_TIMESTAMP);
+  GetValue("to_utc_timestamp(CAST(\"10000-12-31 21:00:00\" as TIMESTAMP), \"PST\")",
+      TYPE_TIMESTAMP);
+
   // With support of date strings this generates a date and 0 time.
   TestStringValue(
       "cast(cast('1999-01-10' as timestamp) as string)", "1999-01-10 00:00:00");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1495b200/be/src/exprs/timestamp-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc
index de01104..1afe349 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -23,11 +23,12 @@
 
 #include "exprs/anyval-util.h"
 #include "exprs/timezone_db.h"
+#include "gutil/strings/substitute.h"
 #include "runtime/string-value.inline.h"
 #include "runtime/timestamp-parse-util.h"
 #include "runtime/timestamp-value.h"
-#include "udf/udf.h"
 #include "udf/udf-internal.h"
+#include "udf/udf.h"
 
 #include "common/names.h"
 
@@ -47,6 +48,24 @@ string TimestampFunctions::ToIsoExtendedString(const TimestampValue& ts_value) {
   return to_iso_extended_string(ts_value.date());
 }
 
+namespace {
+/// Uses Boost's internal checking to throw an exception if 'date' is out of the
+/// supported range of boost::gregorian.
+void ThrowIfDateOutOfRange(const boost::gregorian::date& date) {
+  // Boost checks the ranges when instantiating the year/month/day representations.
+  boost::gregorian::greg_year year = date.year();
+  boost::gregorian::greg_month month = date.month();
+  boost::gregorian::greg_day day = date.day();
+  // Ensure Boost's validation is effective.
+  DCHECK_GE(year, boost::gregorian::greg_year::min());
+  DCHECK_LE(year, boost::gregorian::greg_year::max());
+  DCHECK_GE(month, boost::gregorian::greg_month::min());
+  DCHECK_LE(month, boost::gregorian::greg_month::max());
+  DCHECK_GE(day, boost::gregorian::greg_day::min());
+  DCHECK_LE(day, boost::gregorian::greg_day::max());
+}
+}
+
 // This function uses inline asm functions, which we believe to be from the boost library.
 // Inline asm is not currently supported by JIT, so this function should always be run in
 // the interpreted mode. This is handled in ScalarFnCall::GetUdf().
@@ -67,13 +86,22 @@ TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
     return ts_val;
   }
 
-  ptime temp;
-  ts_value.ToPtime(&temp);
-  local_date_time lt(temp, timezone);
-  TimestampValue return_value = lt.local_time();
-  TimestampVal return_val;
-  return_value.ToTimestampVal(&return_val);
-  return return_val;
+  try {
+    ptime temp;
+    ts_value.ToPtime(&temp);
+    local_date_time lt(temp, timezone);
+    ptime local_time = lt.local_time();
+    ThrowIfDateOutOfRange(local_time.date());
+    TimestampVal return_val;
+    TimestampValue(local_time).ToTimestampVal(&return_val);
+    return return_val;
+  } catch (boost::exception&) {
+    const string& msg = Substitute(
+        "Timestamp '$0' did not convert to a valid local time in timezone '$1'",
+        ts_value.DebugString(), tz_string_value.DebugString());
+    context->AddWarning(msg.c_str());
+    return TimestampVal::null();
+  }
 }
 
 // This function uses inline asm functions, which we believe to be from the boost library.
@@ -96,16 +124,26 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* context,
     return ts_val;
   }
 
-  local_date_time lt(ts_value.date(), ts_value.time(),
-      timezone, local_date_time::NOT_DATE_TIME_ON_ERROR);
-  TimestampValue return_value(lt.utc_time());
-  TimestampVal return_val;
-  return_value.ToTimestampVal(&return_val);
-  return return_val;
+  try {
+    local_date_time lt(ts_value.date(), ts_value.time(), timezone,
+        local_date_time::NOT_DATE_TIME_ON_ERROR);
+    ptime utc_time = lt.utc_time();
+    // The utc_time() conversion does not check ranges - need to explicitly check.
+    ThrowIfDateOutOfRange(utc_time.date());
+    TimestampVal return_val;
+    TimestampValue(utc_time).ToTimestampVal(&return_val);
+    return return_val;
+  } catch (boost::exception&) {
+    const string& msg =
+        Substitute("Timestamp '$0' in timezone '$1' could not be converted to UTC",
+            ts_value.DebugString(), tz_string_value.DebugString());
+    context->AddWarning(msg.c_str());
+    return TimestampVal::null();
+  }
 }
 
-void TimestampFunctions::UnixAndFromUnixPrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
+void TimestampFunctions::UnixAndFromUnixPrepare(
+    FunctionContext* context, FunctionContext::FunctionStateScope scope) {
   if (scope != FunctionContext::THREAD_LOCAL) return;
   DateTimeFormatContext* dt_ctx = NULL;
   if (context->IsArgConstant(1)) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1495b200/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 88f6599..f5e4bab 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2574,3 +2574,51 @@ where  (cast(a.string_col as string) > 'a');
 ---- TYPES
 INT
 ====
+---- QUERY
+# Test from_utc_timestamp() returning out-of-range result.
+select from_utc_timestamp(CAST("1400-01-01 05:00:00" as TIMESTAMP), "PST")
+from alltypes
+limit 1
+---- RESULTS
+NULL
+---- TYPES
+TIMESTAMP
+---- ERRORS
+UDF WARNING: Timestamp '1400-01-01 05:00:00' did not convert to a valid local time in timezone 'PST'
+====
+---- QUERY
+# Test from_utc_timestamp() returning out-of-range result.
+select to_utc_timestamp(CAST("1400-01-01 05:00:00" as TIMESTAMP), "JST")
+from alltypes
+limit 1
+---- RESULTS
+NULL
+---- TYPES
+TIMESTAMP
+---- ERRORS
+UDF WARNING: Timestamp '1400-01-01 05:00:00' in timezone 'JST' could not be converted to UTC
+====
+---- QUERY
+# Test out-of-range value handling when adding dates.
+select CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 367 DAYS
+from alltypes
+limit 1
+---- RESULTS
+NULL
+---- TYPES
+TIMESTAMP
+---- ERRORS
+UDF WARNING: Cannot add interval 367: Year is out of valid range: 1400..10000
+====
+---- QUERY
+# Test out-of-range value handling when subtracting dates.
+select CAST('1400-01-01 21:00:00' AS TIMESTAMP) - INTERVAL 1 DAYS
+from alltypes
+limit 1
+---- RESULTS
+NULL
+---- TYPES
+TIMESTAMP
+---- ERRORS
+UDF WARNING: Cannot subtract interval 1: Year is out of valid range: 1400..10000
+====