You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/15 18:00:50 UTC
[impala] 01/03: IMPALA-9941: ExprTest.CastExprs fails when running
with ASAN fix
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 6d1a707cf50c2297972a69a7c0469afe4a78d739
Author: Adam Tamas <ta...@cloudera.com>
AuthorDate: Fri Jul 10 17:19:14 2020 +0200
IMPALA-9941: ExprTest.CastExprs fails when running with ASAN fix
Fixed the issue with the ASAN build where too short tokens in
timestamps fails because of indexing out of bounds because of a
missing check.
Updated some missed files connected to the drop of dateless timestamps
(IMPALA-9531) regarding tests, comments.
Testing:
- Ran CORE tests
- Ran ASAN with EE_TEST_SHARDS=6 (with the help of IMPALA-9887)
Change-Id: I75bd47b6627a2e338c46dc354f7e67d34c1abbcf
Reviewed-on: http://gerrit.cloudera.org:8080/16179
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exprs/expr-test.cc | 23 +++---
be/src/runtime/date-parse-util.cc | 2 +-
.../runtime/datetime-simple-date-format-parser.cc | 89 +++++++++++-----------
.../runtime/datetime-simple-date-format-parser.h | 4 +-
be/src/runtime/timestamp-parse-util.cc | 4 +-
be/src/runtime/timestamp-parse-util.h | 9 +--
6 files changed, 63 insertions(+), 68 deletions(-)
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 4217ec8..b84382a 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3238,16 +3238,15 @@ TEST_P(ExprTest, CastExprs) {
TimestampValue::ParseSimpleDateFormat("2001-01-21 01:05:31.123450000"));
TestTimestampValue("cast('2001-1-21 1:5:31.12345678910111213' as timestamp)",
TimestampValue::ParseSimpleDateFormat("2001-01-21 01:05:31.123456789"));
- TestTimestampValue("cast('1:05:1.12' as timestamp)",
- TimestampValue::ParseSimpleDateFormat("01:05:01.120000000"));
- TestTimestampValue("cast('1:05:1' as timestamp)",
- TimestampValue::ParseSimpleDateFormat("01:05:01"));
TestTimestampValue("cast(' 2001-01-9 1:05:1 ' as timestamp)",
TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01"));
TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP);
TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP);
TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP);
TestIsNull("cast('1:05:31.123456foo' as timestamp)", TYPE_TIMESTAMP);
+ TestIsNull("cast('1:05:1.12' as timestamp)", TYPE_TIMESTAMP);
+ TestIsNull("cast('1:05:1' as timestamp)", TYPE_TIMESTAMP);
+
TestIsNull("cast('10/feb/10' as timestamp)", TYPE_TIMESTAMP);
TestIsNull("cast('1909-foo1-2bar' as timestamp)", TYPE_TIMESTAMP);
TestIsNull("cast('1909/1-/2' as timestamp)", TYPE_TIMESTAMP);
@@ -3309,21 +3308,21 @@ TEST_P(ExprTest, CastExprs) {
TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01"));
TestTimestampValue("cast(' \t\r\n 2001-01-09 \t\r\n ' as timestamp)",
TimestampValue::ParseSimpleDateFormat("2001-01-09"));
- TestTimestampValue("cast(' \t\r\n 01:05:01 \t\r\n ' as timestamp)",
- TimestampValue::ParseSimpleDateFormat("01:05:01"));
- TestTimestampValue("cast(' \t\r\n 01:05:01.123456789 \t\r\n ' as timestamp)",
- TimestampValue::ParseSimpleDateFormat("01:05:01.123456789"));
TestTimestampValue("cast(' \t\r\n 2001-1-9 1:5:1 \t\r\n ' as timestamp)",
TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01"));
TestTimestampValue("cast(' \t\r\n 2001-1-9 1:5:1.12345678 \t\r\n ' as timestamp)",
TimestampValue::ParseSimpleDateFormat("2001-01-09 01:05:01.123456780"));
- TestTimestampValue("cast(' \t\r\n 1:5:1 \t\r\n ' as timestamp)",
- TimestampValue::ParseSimpleDateFormat("01:05:01"));
- TestTimestampValue("cast(' \t\r\n 1:5:1.12345678 \t\r\n ' as timestamp)",
- TimestampValue::ParseSimpleDateFormat("01:05:01.123456780"));
TestTimestampValue("cast(' \t\r\n 2001-1-9 \t\r\n ' as timestamp)",
TimestampValue::ParseSimpleDateFormat("2001-01-09"));
+ // IMPALA-9531: Dateless timestamps without format should return NULL
+ TestIsNull(
+ "cast(' \t\r\n 1:5:1.12345678 \t\r\n ' as timestamp)", TYPE_TIMESTAMP);
+ TestIsNull("cast(' \t\r\n 1:5:1 \t\r\n ' as timestamp)", TYPE_TIMESTAMP);
+ TestIsNull(
+ "cast(' \t\r\n 01:05:01.123456789 \t\r\n ' as timestamp)", TYPE_TIMESTAMP);
+ TestIsNull("cast(' \t\r\n 01:05:01 \t\r\n ' as timestamp)", TYPE_TIMESTAMP);
+
// IMPALA-6995: whitespace-only strings should return NULL.
TestIsNull("cast(' ' as timestamp)", TYPE_TIMESTAMP);
TestIsNull("cast('\n' as timestamp)", TYPE_TIMESTAMP);
diff --git a/be/src/runtime/date-parse-util.cc b/be/src/runtime/date-parse-util.cc
index 9e94f28..8a37f62 100644
--- a/be/src/runtime/date-parse-util.cc
+++ b/be/src/runtime/date-parse-util.cc
@@ -102,7 +102,7 @@ bool DateParser::ParseSimpleDateFormat(const char* str, int len, bool accept_tim
const DateTimeFormatContext* dt_ctx =
SimpleDateFormatTokenizer::GetDefaultFormatContext(str, trimmed_len,
- accept_time_toks, false);
+ accept_time_toks);
if (dt_ctx != nullptr) return ParseSimpleDateFormat(str, trimmed_len, *dt_ctx, date);
// Generating context lazily as a fall back if default formats fail.
diff --git a/be/src/runtime/datetime-simple-date-format-parser.cc b/be/src/runtime/datetime-simple-date-format-parser.cc
index 33ca531..3d6d79c 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.cc
+++ b/be/src/runtime/datetime-simple-date-format-parser.cc
@@ -336,65 +336,66 @@ bool SimpleDateFormatTokenizer::TokenizeByStr( DateTimeFormatContext* dt_ctx,
}
const DateTimeFormatContext* SimpleDateFormatTokenizer::GetDefaultFormatContext(
- const char* str, int len, bool accept_time_toks, bool accept_time_toks_only) {
+ const char* str, int len, bool accept_time_toks) {
DCHECK(initialized);
DCHECK(str != nullptr);
DCHECK(len > 0);
- // Check if this string starts with a date component
- if (str[4] == '-' && str[7] == '-') {
- // Do we have a date component only?
- if (len == DEFAULT_DATE_FMT_LEN) {
- return &DEFAULT_DATE_CTX;
- }
+ if (LIKELY(len >= DEFAULT_DATE_FMT_LEN)) {
+ // Check if this string starts with a date component
+ if (str[4] == '-' && str[7] == '-') {
+ // Do we have a date component only?
+ if (len == DEFAULT_DATE_FMT_LEN) {
+ return &DEFAULT_DATE_CTX;
+ }
- // We have a time component as well. Do we accept it?
- if (!accept_time_toks) return nullptr;
-
- switch (len) {
- case DEFAULT_SHORT_DATE_TIME_FMT_LEN: {
- if (LIKELY(str[13] == ':')) {
- switch (str[10]) {
- case ' ':
- return &DEFAULT_SHORT_DATE_TIME_CTX;
- case 'T':
- return &DEFAULT_SHORT_ISO_DATE_TIME_CTX;
+ // We have a time component as well. Do we accept it?
+ if (!accept_time_toks) return nullptr;
+
+ switch (len) {
+ case DEFAULT_SHORT_DATE_TIME_FMT_LEN: {
+ if (LIKELY(str[13] == ':')) {
+ switch (str[10]) {
+ case ' ':
+ return &DEFAULT_SHORT_DATE_TIME_CTX;
+ case 'T':
+ return &DEFAULT_SHORT_ISO_DATE_TIME_CTX;
+ }
}
+ break;
}
- break;
- }
- case DEFAULT_DATE_TIME_FMT_LEN: {
- if (LIKELY(str[13] == ':')) {
- switch (str[10]) {
- case ' ':
- return &DEFAULT_DATE_TIME_CTX[9];
- case 'T':
- return &DEFAULT_ISO_DATE_TIME_CTX[9];
+ case DEFAULT_DATE_TIME_FMT_LEN: {
+ if (LIKELY(str[13] == ':')) {
+ switch (str[10]) {
+ case ' ':
+ return &DEFAULT_DATE_TIME_CTX[9];
+ case 'T':
+ return &DEFAULT_ISO_DATE_TIME_CTX[9];
+ }
}
+ break;
}
- break;
- }
- default: {
- // There is likely a fractional component that's below the expected 9 chars.
- // We will need to work out which default context to use that corresponds to
- // the fractional length in the string.
- if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)
- && LIKELY(str[19] == '.') && LIKELY(str[13] == ':')) {
- switch (str[10]) {
- case ' ': {
- return &DEFAULT_DATE_TIME_CTX[len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
- }
- case 'T': {
- return &DEFAULT_ISO_DATE_TIME_CTX
- [len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
+ default: {
+ // There is likely a fractional component that's below the expected 9 chars.
+ // We will need to work out which default context to use that corresponds to
+ // the fractional length in the string.
+ if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)
+ && LIKELY(str[19] == '.') && LIKELY(str[13] == ':')) {
+ switch (str[10]) {
+ case ' ': {
+ return &DEFAULT_DATE_TIME_CTX[len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
+ }
+ case 'T': {
+ return &DEFAULT_ISO_DATE_TIME_CTX
+ [len - DEFAULT_SHORT_DATE_TIME_FMT_LEN - 1];
+ }
}
}
+ break;
}
- break;
}
}
}
-
return nullptr;
}
diff --git a/be/src/runtime/datetime-simple-date-format-parser.h b/be/src/runtime/datetime-simple-date-format-parser.h
index 58b233b..5fa5ba7 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.h
+++ b/be/src/runtime/datetime-simple-date-format-parser.h
@@ -105,12 +105,10 @@ public:
/// len -- length of the string to parse (must be > 0)
/// accept_time_toks -- if true, time tokens are accepted. Otherwise time tokens are
/// rejected.
- /// accept_time_toks_only -- if true, time tokens without date tokens are accepted.
- /// Otherwise, they are rejected.
/// Return the corresponding default format context if parsing succeeded, or nullptr
/// otherwise.
static const DateTimeFormatContext* GetDefaultFormatContext(const char* str, int len,
- bool accept_time_toks, bool accept_time_toks_only);
+ bool accept_time_toks);
/// Initialize the default format contexts. This *must* be called before using
/// GetDefaultFormatContext().
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index aa72598..2b1447b 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -106,9 +106,7 @@ bool TimestampParser::ParseSimpleDateFormat(const char* str, int len,
SimpleDateFormatTokenizer::DEFAULT_DATE_TIME_FMT_LEN);
// Determine the default formatting context that's required for parsing.
const DateTimeFormatContext* dt_ctx =
- SimpleDateFormatTokenizer::GetDefaultFormatContext(str, default_fmt_len, true,
- true);
-
+ SimpleDateFormatTokenizer::GetDefaultFormatContext(str, default_fmt_len, true);
if (dt_ctx != nullptr) {
return ParseSimpleDateFormat(str, default_fmt_len, *dt_ctx, d, t);
}
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index ca71836..2630ecd 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -35,11 +35,10 @@ namespace impala {
class TimestampParser {
public:
/// Parse a default date/time string. The default timestamp format is:
- /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Either just the
- /// date or just the time may be specified. All components are required in either the
+ /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Just the
+ /// date may be specified. All components are required in either the
/// date or time except for the fractional seconds following the period. In the case
- /// of just a date, the time will be set to 00:00:00. In the case of just a time, the
- /// date will be set to invalid.
+ /// of just a date, the time will be set to 00:00:00.
/// str -- valid pointer to the string to parse
/// len -- length of the string to parse (must be > 0)
/// d -- the date value where the results of the parsing will be placed
@@ -50,7 +49,7 @@ class TimestampParser {
/// Parse a date/time string. The data must adhere to SimpleDateFormat, otherwise it
/// will be rejected i.e. no missing tokens. In the case of just a date, the time will
- /// be set to 00:00:00. In the case of just a time, the date will be set to invalid.
+ /// be set to 00:00:00.
/// str -- valid pointer to the string to parse
/// len -- length of the string to parse (must be > 0)
/// dt_ctx -- date/time format context (must contain valid tokens)