You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/15 18:00:50 UTC

[impala] 01/03: IMPALA-9941: ExprTest.CastExprs fails when running with ASAN fix

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6d1a707cf50c2297972a69a7c0469afe4a78d739
Author: Adam Tamas <ta...@cloudera.com>
AuthorDate: Fri Jul 10 17:19:14 2020 +0200

    IMPALA-9941: ExprTest.CastExprs fails when running with ASAN fix
    
    Fixed the issue with the ASAN build where too short tokens in
    timestamps fails because of indexing out of bounds because of a
    missing check.
    Updated some missed files connected to the drop of dateless timestamps
    (IMPALA-9531) regarding tests, comments.
    
    Testing:
     - Ran CORE tests
     - Ran ASAN with EE_TEST_SHARDS=6 (with the help of IMPALA-9887)
    
    Change-Id: I75bd47b6627a2e338c46dc354f7e67d34c1abbcf
    Reviewed-on: http://gerrit.cloudera.org:8080/16179
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/expr-test.cc                          | 23 +++---
 be/src/runtime/date-parse-util.cc                  |  2 +-
 .../runtime/datetime-simple-date-format-parser.cc  | 89 +++++++++++-----------
 .../runtime/datetime-simple-date-format-parser.h   |  4 +-
 be/src/runtime/timestamp-parse-util.cc             |  4 +-
 be/src/runtime/timestamp-parse-util.h              |  9 +--
 6 files changed, 63 insertions(+), 68 deletions(-)

diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 4217ec8..b84382a 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3238,16 +3238,15 @@ TEST_P(ExprTest, CastExprs) {
       TimestampValue::ParseSimpleDateFormat("2001-01-21 01:05:31.123450000"));
   TestTimestampValue("cast('2001-1-21 1:5:31.12345678910111213' as timestamp)",
       TimestampValue::ParseSimpleDateFormat("2001-01-21 01:05:31.123456789"));
-  TestTimestampValue("cast('1:05:1.12' as timestamp)",
-      TimestampValue::ParseSimpleDateFormat("01:05:01.120000000"));
-  TestTimestampValue("cast('1:05:1' as timestamp)",
-      TimestampValue::ParseSimpleDateFormat("01:05:01"));
   TestTimestampValue("cast('        2001-01-9 1:05:1        ' as timestamp)",
       TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01"));
   TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('1:05:31.123456foo' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1:05:1.12' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1:05:1' as timestamp)", TYPE_TIMESTAMP);
+
   TestIsNull("cast('10/feb/10' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('1909-foo1-2bar' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('1909/1-/2' as timestamp)", TYPE_TIMESTAMP);
@@ -3309,21 +3308,21 @@ TEST_P(ExprTest, CastExprs) {
       TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01"));
   TestTimestampValue("cast('  \t\r\n      2001-01-09   \t\r\n     ' as timestamp)",
       TimestampValue::ParseSimpleDateFormat("2001-01-09"));
-  TestTimestampValue("cast('  \t\r\n      01:05:01   \t\r\n     ' as timestamp)",
-      TimestampValue::ParseSimpleDateFormat("01:05:01"));
-  TestTimestampValue("cast(' \t\r\n 01:05:01.123456789   \t\r\n     ' as timestamp)",
-      TimestampValue::ParseSimpleDateFormat("01:05:01.123456789"));
   TestTimestampValue("cast('  \t\r\n      2001-1-9 1:5:1    \t\r\n    ' as timestamp)",
       TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01"));
   TestTimestampValue("cast('  \t\r\n  2001-1-9 1:5:1.12345678  \t\r\n ' as timestamp)",
       TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01.123456780"));
-  TestTimestampValue("cast('  \t\r\n      1:5:1    \t\r\n    ' as timestamp)",
-      TimestampValue::ParseSimpleDateFormat("01:05:01"));
-  TestTimestampValue("cast('  \t\r\n      1:5:1.12345678    \t\r\n    ' as timestamp)",
-      TimestampValue::ParseSimpleDateFormat("01:05:01.123456780"));
   TestTimestampValue("cast('  \t\r\n      2001-1-9    \t\r\n    ' as timestamp)",
       TimestampValue::ParseSimpleDateFormat("2001-01-09"));
 
+  // IMPALA-9531: Dateless timestamps without format should return NULL
+  TestIsNull(
+      "cast('  \t\r\n      1:5:1.12345678    \t\r\n    ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('  \t\r\n      1:5:1    \t\r\n    ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull(
+      "cast(' \t\r\n 01:05:01.123456789   \t\r\n     ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('  \t\r\n      01:05:01   \t\r\n     ' as timestamp)", TYPE_TIMESTAMP);
+
   // IMPALA-6995: whitespace-only strings should return NULL.
   TestIsNull("cast(' ' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('\n' as timestamp)", TYPE_TIMESTAMP);
diff --git a/be/src/runtime/date-parse-util.cc b/be/src/runtime/date-parse-util.cc
index 9e94f28..8a37f62 100644
--- a/be/src/runtime/date-parse-util.cc
+++ b/be/src/runtime/date-parse-util.cc
@@ -102,7 +102,7 @@ bool DateParser::ParseSimpleDateFormat(const char* str, int len, bool accept_tim
 
   const DateTimeFormatContext* dt_ctx =
       SimpleDateFormatTokenizer::GetDefaultFormatContext(str, trimmed_len,
-          accept_time_toks, false);
+          accept_time_toks);
   if (dt_ctx != nullptr) return ParseSimpleDateFormat(str, trimmed_len, *dt_ctx, date);
 
   // Generating context lazily as a fall back if default formats fail.
diff --git a/be/src/runtime/datetime-simple-date-format-parser.cc b/be/src/runtime/datetime-simple-date-format-parser.cc
index 33ca531..3d6d79c 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.cc
+++ b/be/src/runtime/datetime-simple-date-format-parser.cc
@@ -336,65 +336,66 @@ bool SimpleDateFormatTokenizer::TokenizeByStr( DateTimeFormatContext* dt_ctx,
 }
 
 const DateTimeFormatContext* SimpleDateFormatTokenizer::GetDefaultFormatContext(
-    const char* str, int len, bool accept_time_toks, bool accept_time_toks_only) {
+    const char* str, int len, bool accept_time_toks) {
   DCHECK(initialized);
   DCHECK(str != nullptr);
   DCHECK(len > 0);
 
-  // Check if this string starts with a date component
-  if (str[4] == '-' && str[7] == '-') {
-    // Do we have a date component only?
-    if (len == DEFAULT_DATE_FMT_LEN) {
-      return &DEFAULT_DATE_CTX;
-    }
+  if (LIKELY(len >= DEFAULT_DATE_FMT_LEN)) {
+    // Check if this string starts with a date component
+    if (str[4] == '-' && str[7] == '-') {
+      // Do we have a date component only?
+      if (len == DEFAULT_DATE_FMT_LEN) {
+        return &DEFAULT_DATE_CTX;
+      }
 
-    // We have a time component as well. Do we accept it?
-    if (!accept_time_toks) return nullptr;
-
-    switch (len) {
-      case DEFAULT_SHORT_DATE_TIME_FMT_LEN: {
-        if (LIKELY(str[13] == ':')) {
-          switch (str[10]) {
-            case ' ':
-              return &DEFAULT_SHORT_DATE_TIME_CTX;
-            case 'T':
-              return &DEFAULT_SHORT_ISO_DATE_TIME_CTX;
+      // We have a time component as well. Do we accept it?
+      if (!accept_time_toks) return nullptr;
+
+      switch (len) {
+        case DEFAULT_SHORT_DATE_TIME_FMT_LEN: {
+          if (LIKELY(str[13] == ':')) {
+            switch (str[10]) {
+              case ' ':
+                return &DEFAULT_SHORT_DATE_TIME_CTX;
+              case 'T':
+                return &DEFAULT_SHORT_ISO_DATE_TIME_CTX;
+            }
           }
+          break;
         }
-        break;
-      }
-      case DEFAULT_DATE_TIME_FMT_LEN: {
-        if (LIKELY(str[13] == ':')) {
-          switch (str[10]) {
-            case ' ':
-              return &DEFAULT_DATE_TIME_CTX[9];
-            case 'T':
-              return &DEFAULT_ISO_DATE_TIME_CTX[9];
+        case DEFAULT_DATE_TIME_FMT_LEN: {
+          if (LIKELY(str[13] == ':')) {
+            switch (str[10]) {
+              case ' ':
+                return &DEFAULT_DATE_TIME_CTX[9];
+              case 'T':
+                return &DEFAULT_ISO_DATE_TIME_CTX[9];
+            }
           }
+          break;
         }
-        break;
-      }
-      default: {
-        // There is likely a fractional component that's below the expected 9 chars.
-        // We will need to work out which default context to use that corresponds to
-        // the fractional length in the string.
-        if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)
-            && LIKELY(str[19] == '.') && LIKELY(str[13] == ':')) {
-          switch (str[10]) {
-            case ' ': {
-              return &DEFAULT_DATE_TIME_CTX[len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
-            }
-            case 'T': {
-              return &DEFAULT_ISO_DATE_TIME_CTX
-                  [len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
+        default: {
+          // There is likely a fractional component that's below the expected 9 chars.
+          // We will need to work out which default context to use that corresponds to
+          // the fractional length in the string.
+          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)
+              && LIKELY(str[19] == '.') && LIKELY(str[13] == ':')) {
+            switch (str[10]) {
+              case ' ': {
+                return &DEFAULT_DATE_TIME_CTX[len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
+              }
+              case 'T': {
+                return &DEFAULT_ISO_DATE_TIME_CTX
+                    [len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
+              }
             }
           }
+          break;
         }
-        break;
       }
     }
   }
-
   return nullptr;
 }
 
diff --git a/be/src/runtime/datetime-simple-date-format-parser.h b/be/src/runtime/datetime-simple-date-format-parser.h
index 58b233b..5fa5ba7 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.h
+++ b/be/src/runtime/datetime-simple-date-format-parser.h
@@ -105,12 +105,10 @@ public:
   /// len -- length of the string to parse (must be > 0)
   /// accept_time_toks -- if true, time tokens are accepted. Otherwise time tokens are
   /// rejected.
-  /// accept_time_toks_only -- if true, time tokens without date tokens are accepted.
-  /// Otherwise, they are rejected.
   /// Return the corresponding default format context if parsing succeeded, or nullptr
   /// otherwise.
   static const DateTimeFormatContext* GetDefaultFormatContext(const char* str, int len,
-      bool accept_time_toks, bool accept_time_toks_only);
+      bool accept_time_toks);
 
   /// Initialize the default format contexts. This *must* be called before using
   /// GetDefaultFormatContext().
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index aa72598..2b1447b 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -106,9 +106,7 @@ bool TimestampParser::ParseSimpleDateFormat(const char* str, int len,
       SimpleDateFormatTokenizer::DEFAULT_DATE_TIME_FMT_LEN);
   // Determine the default formatting context that's required for parsing.
   const DateTimeFormatContext* dt_ctx =
-      SimpleDateFormatTokenizer::GetDefaultFormatContext(str, default_fmt_len, true,
-          true);
-
+      SimpleDateFormatTokenizer::GetDefaultFormatContext(str, default_fmt_len, true);
   if (dt_ctx != nullptr) {
     return ParseSimpleDateFormat(str, default_fmt_len, *dt_ctx, d, t);
   }
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index ca71836..2630ecd 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -35,11 +35,10 @@ namespace impala {
 class TimestampParser {
  public:
   /// Parse a default date/time string. The default timestamp format is:
-  /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Either just the
-  /// date or just the time may be specified. All components are required in either the
+  /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Just the
+  /// date may be specified. All components are required in either the
   /// date or time except for the fractional seconds following the period. In the case
-  /// of just a date, the time will be set to 00:00:00. In the case of just a time, the
-  /// date will be set to invalid.
+  /// of just a date, the time will be set to 00:00:00.
   /// str -- valid pointer to the string to parse
   /// len -- length of the string to parse (must be > 0)
   /// d -- the date value where the results of the parsing will be placed
@@ -50,7 +49,7 @@ class TimestampParser {
 
   /// Parse a date/time string. The data must adhere to SimpleDateFormat, otherwise it
   /// will be rejected i.e. no missing tokens. In the case of just a date, the time will
-  /// be set to 00:00:00. In the case of just a time, the date will be set to invalid.
+  /// be set to 00:00:00.
   /// str -- valid pointer to the string to parse
   /// len -- length of the string to parse (must be > 0)
   /// dt_ctx -- date/time format context (must contain valid tokens)