You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2019/12/05 22:59:39 UTC
[impala] 04/06: IMPALA-8705: ISO:SQL:2016 datetime patterns -
Milestone 3
This is an automated email from the ASF dual-hosted git repository.
arodoni pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 30c7a6a18c85574ff76dc750dfef94475f1c9796
Author: Gabor Kaszab <ga...@cloudera.com>
AuthorDate: Wed Oct 16 08:49:54 2019 +0200
IMPALA-8705: ISO:SQL:2016 datetime patterns - Milestone 3
This patch adds additional datetime format tokens on top of
Milestone 1 (IMPALA-8703) and Milestone 2 (IMPALA-8704).
The tokens introduced:
- Full month name (MONTH, Month, month): In a string to datetime
conversion this token can parse textual month name into a datetime
type. In a datetime to string conversion this token gives the
textual representation of a month.
- Short month name (MON, Mon, mon): Similar to the full month name
token but this works for 3-character month names like 'JAN'.
- Full day name (DAY, Day, day): In a datetime to string conversion
this token gives the textual representation of a day like
'Tuesday.' Not suppported in a string to datetime conversion.
- Short day name (DY, Dy, dy): Similar to full day name token but
this works for 3-character day names like 'TUE'. Not suppported in
a string to datetime conversion.
- Day of week (D): In a datetime to string conversion this gives a
number in [1-7] where 1 represents Sunday. Not supported in a
string to datetime conversion.
- Quarter of year (Q): In a datetime to string conversion this gives
a number in [1-4] representing a quarter of the year. Not supported
in a string to datetime conversion.
- Week of year (WW): In a datetime to string conversion this gives a
number in [1-53] to represent the week of year where the first week
starts from 1st of January. Not supported in a string to datetime
conversion.
- Week of month (W): In a datetime to string conversion this gives a
number in [1-5] to represent the week of month where the first week
starts from the first day of the month. Not supported in a string
to datetime conversion.
Change-Id: Ic797f19a1311b54e5d00d01d0a7afe1f0f21fb8f
Reviewed-on: http://gerrit.cloudera.org:8080/14714
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/benchmarks/convert-timestamp-benchmark.cc | 1 -
be/src/benchmarks/parse-timestamp-benchmark.cc | 1 -
be/src/common/init.cc | 1 -
be/src/exprs/date-functions-ir.cc | 6 +-
be/src/exprs/timestamp-functions-ir.cc | 11 +-
be/src/exprs/timestamp-functions.cc | 48 +-
be/src/exprs/timestamp-functions.h | 37 +-
be/src/runtime/date-parse-util.cc | 35 +-
be/src/runtime/date-parse-util.h | 4 +
be/src/runtime/datetime-iso-sql-format-parser.cc | 18 +-
be/src/runtime/datetime-iso-sql-format-parser.h | 10 +-
.../runtime/datetime-iso-sql-format-tokenizer.cc | 35 +-
be/src/runtime/datetime-parser-common.cc | 123 +++-
be/src/runtime/datetime-parser-common.h | 124 +++-
.../runtime/datetime-simple-date-format-parser.cc | 39 +-
.../runtime/datetime-simple-date-format-parser.h | 9 -
be/src/runtime/timestamp-parse-util.cc | 27 +-
tests/query_test/test_cast_with_format.py | 642 ++++++++++++++++++++-
18 files changed, 1060 insertions(+), 111 deletions(-)
diff --git a/be/src/benchmarks/convert-timestamp-benchmark.cc b/be/src/benchmarks/convert-timestamp-benchmark.cc
index 104b162..c90de38 100644
--- a/be/src/benchmarks/convert-timestamp-benchmark.cc
+++ b/be/src/benchmarks/convert-timestamp-benchmark.cc
@@ -707,7 +707,6 @@ int main(int argc, char* argv[]) {
DCHECK(PTR_CCTZ_LOCAL_TZ != nullptr);
SimpleDateFormatTokenizer::InitCtx();
- SimpleDateFormatParser::InitCtx();
const vector<TimestampValue> tsvalue_data =
AddTestDataDateTimes(1000, "1953-04-22 01:02:03");
diff --git a/be/src/benchmarks/parse-timestamp-benchmark.cc b/be/src/benchmarks/parse-timestamp-benchmark.cc
index 078192a..c7ca51a 100644
--- a/be/src/benchmarks/parse-timestamp-benchmark.cc
+++ b/be/src/benchmarks/parse-timestamp-benchmark.cc
@@ -235,7 +235,6 @@ int main(int argc, char **argv) {
cout << Benchmark::GetMachineInfo() << endl;
SimpleDateFormatTokenizer::InitCtx();
- SimpleDateFormatParser::InitCtx();
TestData dates, times, datetimes, tzdatetimes;
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index dec323c..e395afc 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -310,7 +310,6 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,
#endif
impala::InitThreading();
impala::datetime_parse_util::SimpleDateFormatTokenizer::InitCtx();
- impala::datetime_parse_util::SimpleDateFormatParser::InitCtx();
impala::SeedOpenSSLRNG();
ABORT_IF_ERROR(impala::InitAuth(argv[0]));
diff --git a/be/src/exprs/date-functions-ir.cc b/be/src/exprs/date-functions-ir.cc
index 1005ab4..8e16fb5 100644
--- a/be/src/exprs/date-functions-ir.cc
+++ b/be/src/exprs/date-functions-ir.cc
@@ -111,7 +111,8 @@ StringVal DateFunctions::LongDayName(FunctionContext* context, const DateVal& d_
DCHECK_GE(wday, 0);
DCHECK_LE(wday, 6);
wday = (wday + 1) % 7;
- const string& day_name = TimestampFunctions::DAYNAME_ARRAY[wday];
+ const string& day_name =
+ TimestampFunctions::DAY_NAMES[TimestampFunctions::CAPITALIZED][wday];
return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(day_name.data())),
day_name.size());
}
@@ -125,7 +126,8 @@ StringVal DateFunctions::LongMonthName(FunctionContext* context, const DateVal&
DCHECK_GE(month, 1);
DCHECK_LE(month, 12);
- const string& mn = TimestampFunctions::MONTHNAME_ARRAY[month - 1];
+ const string& mn =
+ TimestampFunctions::MONTH_NAMES[TimestampFunctions::CAPITALIZED][month - 1];
return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(mn.data())), mn.size());
}
diff --git a/be/src/exprs/timestamp-functions-ir.cc b/be/src/exprs/timestamp-functions-ir.cc
index 3d27340..240fcd1 100644
--- a/be/src/exprs/timestamp-functions-ir.cc
+++ b/be/src/exprs/timestamp-functions-ir.cc
@@ -450,7 +450,7 @@ string TimestampFunctions::ShortDayName(FunctionContext* context,
IntVal dow = DayOfWeek(context, ts);
DCHECK_GT(dow.val, 0);
DCHECK_LT(dow.val, 8);
- return DAY_ARRAY[dow.val - 1];
+ return SHORT_DAY_NAMES[CAPITALIZED][dow.val - 1];
}
StringVal TimestampFunctions::LongDayName(FunctionContext* context,
@@ -459,8 +459,9 @@ StringVal TimestampFunctions::LongDayName(FunctionContext* context,
IntVal dow = DayOfWeek(context, ts);
DCHECK_GT(dow.val, 0);
DCHECK_LT(dow.val, 8);
- const string& day_name = DAYNAME_ARRAY[dow.val - 1];
- return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(day_name.data())), day_name.size());
+ const string& day_name = DAY_NAMES[CAPITALIZED][dow.val - 1];
+ return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(day_name.data())),
+ day_name.size());
}
string TimestampFunctions::ShortMonthName(FunctionContext* context,
@@ -469,7 +470,7 @@ string TimestampFunctions::ShortMonthName(FunctionContext* context,
IntVal mth = Month(context, ts);
DCHECK_GT(mth.val, 0);
DCHECK_LT(mth.val, 13);
- return MONTH_ARRAY[mth.val - 1];
+ return SHORT_MONTH_NAMES[CAPITALIZED][mth.val - 1];
}
StringVal TimestampFunctions::LongMonthName(FunctionContext* context,
@@ -478,7 +479,7 @@ StringVal TimestampFunctions::LongMonthName(FunctionContext* context,
IntVal mth = Month(context, ts);
DCHECK_GT(mth.val, 0);
DCHECK_LT(mth.val, 13);
- const string& mn = MONTHNAME_ARRAY[mth.val - 1];
+ const string& mn = MONTH_NAMES[CAPITALIZED][mth.val - 1];
return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(mn.data())), mn.size());
}
diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc
index 231c853..92c2d2e 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -34,15 +34,45 @@ namespace impala {
using namespace datetime_parse_util;
-const string TimestampFunctions::DAY_ARRAY[7] = {"Sun", "Mon", "Tue", "Wed", "Thu",
- "Fri", "Sat"};
-const string TimestampFunctions::DAYNAME_ARRAY[7] = {"Sunday", "Monday", "Tuesday",
- "Wednesday", "Thursday", "Friday", "Saturday"};
-const string TimestampFunctions::MONTH_ARRAY[12] = {"Jan", "Feb", "Mar", "Apr", "May",
- "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
-const string TimestampFunctions::MONTHNAME_ARRAY[12] = {"January", "February", "March",
- "April", "May", "June", "July", "August", "September", "October", "November",
- "December"};
+const string TimestampFunctions::SHORT_MONTH_NAMES[3][12] = {
+ {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"},
+ {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"},
+ {"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"}
+};
+
+const string TimestampFunctions::MONTH_NAMES[3][12] = {
+ {"January", "February", "March", "April", "May", "June", "July", "August",
+ "September", "October", "November", "December"},
+ {"january", "february", "march", "april", "may", "june", "july", "august",
+ "september", "october", "november", "december"},
+ {"JANUARY", "FEBRUARY", "MARCH", "APRIL", "MAY", "JUNE", "JULY", "AUGUST",
+ "SEPTEMBER", "OCTOBER", "NOVEMBER", "DECEMBER"}};
+
+const string TimestampFunctions::MONTH_NAMES_PADDED[3][12] = {
+ {"January ", "February ", "March ", "April ", "May ", "June ",
+ "July ", "August ", "September", "October ", "November ", "December "},
+ {"january ", "february ", "march ", "april ", "may ", "june ",
+ "july ", "august ", "september", "october ", "november ", "december "},
+ {"JANUARY ", "FEBRUARY ", "MARCH ", "APRIL ", "MAY ", "JUNE ",
+ "JULY ", "AUGUST ", "SEPTEMBER", "OCTOBER ", "NOVEMBER ", "DECEMBER "}};
+
+const string TimestampFunctions::SHORT_DAY_NAMES[3][7] = {
+ {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"},
+ {"sun", "mon", "tue", "wed", "thu", "fri", "sat"},
+ {"SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"}};
+
+const string TimestampFunctions::DAY_NAMES[3][7] = {
+ {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"},
+ {"sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"},
+ {"SUNDAY", "MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", "SATURDAY"}};
+
+const string TimestampFunctions::DAY_NAMES_PADDED[3][7] = {
+ {"Sunday ", "Monday ", "Tuesday ", "Wednesday", "Thursday ", "Friday ",
+ "Saturday "},
+ {"sunday ", "monday ", "tuesday ", "wednesday", "thursday ", "friday ",
+ "saturday "},
+ {"SUNDAY ", "MONDAY ", "TUESDAY ", "WEDNESDAY", "THURSDAY ", "FRIDAY ",
+ "SATURDAY "}};
// Sunday is mapped to 0 and Saturday is mapped to 6.
const map<string, int> TimestampFunctions::DAYNAME_MAP = {
diff --git a/be/src/exprs/timestamp-functions.h b/be/src/exprs/timestamp-functions.h
index e1d3773..992ae34 100644
--- a/be/src/exprs/timestamp-functions.h
+++ b/be/src/exprs/timestamp-functions.h
@@ -67,13 +67,32 @@ class TimestampFunctions {
static const int64_t MAX_MILLI_INTERVAL = MAX_SEC_INTERVAL * 1000;
static const int64_t MAX_MICRO_INTERVAL = MAX_MILLI_INTERVAL * 1000;
- /// Static result values for ShortMonthName() function.
- /// Short month names are also used in DateParser.
- static const std::string MONTH_ARRAY[12];
- /// Static result values for ShortDayName() and LongMonthName() functions.
- /// These are also used in DateFunctions.
- static const std::string DAYNAME_ARRAY[7];
- static const std::string MONTHNAME_ARRAY[12];
+ /// Use this as the first index for SHORT_MONTH_NAMES, MONTH_NAMES etc.
+ enum TextCase {CAPITALIZED, LOWERCASE, UPPERCASE};
+
+ /// Contains the short names of the months in 3 different case: Capitalized, full
+ /// lowercase and full uppercase.
+ static const std::string SHORT_MONTH_NAMES[3][12];
+
+ /// Contains the full names of the months in 3 different case: Capitalized, full
+ /// lowercase and full uppercase. E.g. "January", "january", "JANUARY", etc.
+ static const std::string MONTH_NAMES[3][12];
+
+ /// Similar to MONTH_NAMES but here the values are padded with spaces to the maximum
+ /// length.
+ static const std::string MONTH_NAMES_PADDED[3][12];
+
+ /// Contains the short day names in 3 different case: Capitalized, full
+ /// lowercase and full uppercase. E.g. "Sun", "sun", "SUN", etc.
+ static const std::string SHORT_DAY_NAMES[3][7];
+
+ /// Contains the full names of the days in 3 different case: Capitalized, full
+ /// lowercase and full uppercase. E.g. "Sunday", "sunday", "SUNDAY", etc.
+ static const std::string DAY_NAMES[3][7];
+
+ /// Similar to DAY_NAMES but here the values are padded with spaces to the maximum
+ /// length.
+ static const std::string DAY_NAMES_PADDED[3][7];
/// Maps full and abbreviated lowercase names of day-of-week to an int in the 0-6 range.
/// Sunday is mapped to 0 and Saturday is mapped to 6.
@@ -238,10 +257,6 @@ class TimestampFunctions {
/// Helper function to check date/time format strings.
/// TODO: eventually return format converted from Java to Boost.
static StringValue* CheckFormat(StringValue* format);
-
- private:
- /// Static result values for DayName() function.
- static const std::string DAY_ARRAY[7];
};
} // namespace impala
diff --git a/be/src/runtime/date-parse-util.cc b/be/src/runtime/date-parse-util.cc
index ef3b041..579a895 100644
--- a/be/src/runtime/date-parse-util.cc
+++ b/be/src/runtime/date-parse-util.cc
@@ -20,7 +20,6 @@
#include <boost/date_time/gregorian/gregorian.hpp>
#include "cctz/civil_time.h"
-#include "exprs/timestamp-functions.h"
#include "runtime/datetime-iso-sql-format-parser.h"
#include "runtime/datetime-simple-date-format-parser.h"
#include "runtime/string-value.inline.h"
@@ -152,9 +151,26 @@ string DateParser::Format(const DateTimeFormatContext& dt_ctx, const DateValue&
}
break;
}
+ case QUARTER_OF_YEAR: {
+ num_val = GetQuarter(month);
+ break;
+ }
case MONTH_IN_YEAR: num_val = month; break;
- case MONTH_IN_YEAR_SLT: {
- result.append(TimestampFunctions::MONTH_ARRAY[month - 1]);
+ case MONTH_NAME:
+ case MONTH_NAME_SHORT: {
+ result.append(FormatMonthName(month, tok));
+ break;
+ }
+ case WEEK_OF_YEAR: {
+ num_val = GetWeekOfYear(year, month, day);
+ break;
+ }
+ case WEEK_OF_MONTH: {
+ num_val = GetWeekOfMonth(day);
+ break;
+ }
+ case DAY_OF_WEEK: {
+ num_val = GetDayOfWeek(date);
break;
}
case DAY_IN_MONTH: num_val = day; break;
@@ -162,6 +178,11 @@ string DateParser::Format(const DateTimeFormatContext& dt_ctx, const DateValue&
num_val = GetDayInYear(year, month, day);
break;
}
+ case DAY_NAME:
+ case DAY_NAME_SHORT: {
+ result.append(FormatDayName(GetDayOfWeek(date), tok));
+ break;
+ }
case SEPARATOR: {
result.append(tok.val, tok.len);
break;
@@ -188,4 +209,12 @@ bool DateParser::IndicateDateParseFailure(DateValue* date) {
return false;
}
+int DateParser::GetDayOfWeek(const DateValue& date) {
+ DCHECK(date.IsValid());
+ // DateValue::WeekDay() returns [0-6] where Monday is 0.
+ int dow = date.WeekDay();
+ // Convert to [1-7] where Sunday is 1.
+ return (dow + 1) % 7 + 1;
+}
+
}
diff --git a/be/src/runtime/date-parse-util.h b/be/src/runtime/date-parse-util.h
index c8b8765..a6174a7 100644
--- a/be/src/runtime/date-parse-util.h
+++ b/be/src/runtime/date-parse-util.h
@@ -80,6 +80,10 @@ class DateParser {
/// Helper for parse functions to produce return value and set output parameter to an
/// invalid DateValue when parsing fails.
static bool IndicateDateParseFailure(DateValue* date);
+
+ /// Returns a number between 1 and 7 that represents the day of the week where Sunday
+ /// is 1.
+ static int GetDayOfWeek(const DateValue& date);
};
}
diff --git a/be/src/runtime/datetime-iso-sql-format-parser.cc b/be/src/runtime/datetime-iso-sql-format-parser.cc
index 976f39a..dae2b99 100644
--- a/be/src/runtime/datetime-iso-sql-format-parser.cc
+++ b/be/src/runtime/datetime-iso-sql-format-parser.cc
@@ -74,7 +74,8 @@ bool IsoSqlFormatParser::ParseDateTime(const char* input_str, int input_len,
continue;
}
- const char* token_end_pos = FindEndOfToken(current_pos, end_pos - current_pos, *tok);
+ const char* token_end_pos =
+ FindEndOfToken(current_pos, end_pos - current_pos, *tok, dt_ctx.fx_modifier);
if (token_end_pos == nullptr) return false;
int token_len = token_end_pos - current_pos;
@@ -106,6 +107,14 @@ bool IsoSqlFormatParser::ParseDateTime(const char* input_str, int input_len,
}
break;
}
+ case MONTH_NAME:
+ case MONTH_NAME_SHORT: {
+ if (!ParseMonthNameToken(*tok, current_pos, &token_end_pos, dt_ctx.fx_modifier,
+ &result->month)) {
+ return false;
+ }
+ break;
+ }
case DAY_IN_MONTH: {
if (!ParseAndValidate(current_pos, token_len, 1, 31, &result->day)) return false;
break;
@@ -272,7 +281,7 @@ bool IsoSqlFormatParser::ProcessSeparatorSequence(const char** current_pos,
}
const char* IsoSqlFormatParser::FindEndOfToken(const char* input_str,
- int input_len, const DateTimeFormatToken& tok) {
+ int input_len, const DateTimeFormatToken& tok, bool fx_provided) {
DCHECK(input_str != nullptr);
DCHECK(input_len >= 0);
@@ -286,6 +295,11 @@ const char* IsoSqlFormatParser::FindEndOfToken(const char* input_str,
return ParseMeridiemIndicatorFromInput(input_str, input_len);
}
+ if (tok.type == MONTH_NAME && fx_provided && !tok.fm_modifier) {
+ if (input_len < MAX_MONTH_NAME_LENGTH) return nullptr;
+ return input_str + MAX_MONTH_NAME_LENGTH;
+ }
+
int max_tok_len = min(input_len, tok.len);
const char* start_of_token = input_str;
if (tok.type == TIMEZONE_HOUR) {
diff --git a/be/src/runtime/datetime-iso-sql-format-parser.h b/be/src/runtime/datetime-iso-sql-format-parser.h
index aed93d7..5491abf 100644
--- a/be/src/runtime/datetime-iso-sql-format-parser.h
+++ b/be/src/runtime/datetime-iso-sql-format-parser.h
@@ -42,12 +42,16 @@ public:
private:
/// 'input_str' points to a location in the input string where the parsing stands now.
/// Given 'tok' as the next token in the list of tokens created by the tokenizer this
- /// functions finds the end of the next token.
+ /// function finds the end of the next token.
/// 'input_len' is used for stopping when we reach the end of the input string.
- /// Return a pointer pointing one position after the last character of the found token.
+ /// Returns a pointer pointing one position after the last character of the found token.
/// If can't identify the next token then returns nullptr.
+ /// If a MONTH_NAME token is not followed by a separator then the end of the month name
+ /// in the input can't be found by this function. In this case MAX_MONTH_NAME_LENGTH is
+ /// expected as the lenght of the month token and later on ParseMonthNameToken() will
+ /// adjust the end of the token.
static const char* FindEndOfToken(const char* input_str, int input_len,
- const DateTimeFormatToken& tok) WARN_UNUSED_RESULT;
+ const DateTimeFormatToken& tok, bool fx_provided) WARN_UNUSED_RESULT;
/// Has to call this function when 'input_str' points to the fist character of a
/// meridiem indicator. Identifies the last position of a meridiem indicator and returns
diff --git a/be/src/runtime/datetime-iso-sql-format-tokenizer.cc b/be/src/runtime/datetime-iso-sql-format-tokenizer.cc
index fa584e2..124f03a 100644
--- a/be/src/runtime/datetime-iso-sql-format-tokenizer.cc
+++ b/be/src/runtime/datetime-iso-sql-format-tokenizer.cc
@@ -62,13 +62,22 @@ const unordered_map<string, IsoSqlFormatTokenizer::TokenItem>
{"T", IsoSqlFormatTokenizer::TokenItem(ISO8601_TIME_INDICATOR, false, true)},
{"Z", IsoSqlFormatTokenizer::TokenItem(ISO8601_ZULU_INDICATOR, false, true)},
{"FM", IsoSqlFormatTokenizer::TokenItem(FM_MODIFIER, false, false)},
- {"FX", IsoSqlFormatTokenizer::TokenItem(FX_MODIFIER, false, false)}
+ {"FX", IsoSqlFormatTokenizer::TokenItem(FX_MODIFIER, false, false)},
+ {"MONTH", IsoSqlFormatTokenizer::TokenItem(MONTH_NAME, true, false)},
+ {"MON", IsoSqlFormatTokenizer::TokenItem(MONTH_NAME_SHORT, true, false)},
+ {"DAY", IsoSqlFormatTokenizer::TokenItem(DAY_NAME, true, false)},
+ {"DY", IsoSqlFormatTokenizer::TokenItem(DAY_NAME_SHORT, true, false)},
+ {"D", IsoSqlFormatTokenizer::TokenItem(DAY_OF_WEEK, true, false)},
+ {"Q", IsoSqlFormatTokenizer::TokenItem(QUARTER_OF_YEAR, true, false)},
+ {"WW", IsoSqlFormatTokenizer::TokenItem(WEEK_OF_YEAR, true, false)},
+ {"W", IsoSqlFormatTokenizer::TokenItem(WEEK_OF_MONTH, true, false)}
});
const unordered_map<string, int> IsoSqlFormatTokenizer::SPECIAL_LENGTHS({
- {"HH", 2}, {"HH12", 2}, {"HH24", 2}, {"FF", 9}, {"FF1", 1}, {"FF2", 2}, {"FF3", 3},
- {"FF4", 4}, {"FF5", 5}, {"FF6", 6}, {"FF7", 7}, {"FF8", 8}, {"FF9", 9}, {"TZM", 2}
-});
+ {"HH12", 2}, {"HH24", 2}, {"FF", 9}, {"FF1", 1}, {"FF2", 2}, {"FF4", 4}, {"FF5", 5},
+ {"FF6", 6}, {"FF7", 7}, {"FF8", 8}, {"FF9", 9}, {"TZM", 2},
+ {"MONTH", MAX_MONTH_NAME_LENGTH}, {"DAY", MAX_DAY_NAME_LENGTH},
+ {"DY", SHORT_DAY_NAME_LENGTH}});
const unsigned IsoSqlFormatTokenizer::MAX_TOKEN_SIZE = 5;
@@ -186,10 +195,22 @@ FormatTokenizationResult IsoSqlFormatTokenizer::CheckIncompatibilities() const {
return YEAR_WITH_ROUNDED_YEAR_ERROR;
}
- if (IsUsedToken("DDD") && (IsUsedToken("DD") || IsUsedToken("MM"))) {
+ if (IsUsedToken("Q")) return QUARTER_NOT_ALLOWED_FOR_PARSING;
+
+ short provided_month_tokens = IsUsedToken("MM") + IsUsedToken("MONTH") +
+ IsUsedToken("MON");
+ if (provided_month_tokens > 1) return CONFLICTING_MONTH_TOKENS_ERROR;
+
+ if (IsUsedToken("WW") || IsUsedToken("W")) return WEEK_NUMBER_NOT_ALLOWED_FOR_PARSING;
+
+ if (IsUsedToken("DDD") && (IsUsedToken("DD") || provided_month_tokens == 1)) {
return DAY_OF_YEAR_TOKEN_CONFLICT;
}
+ if (IsUsedToken("D")) return DAY_OF_WEEK_NOT_ALLOWED_FOR_PARSING;
+
+ if (IsUsedToken("DAY") || IsUsedToken("DY")) return DAY_NAME_NOT_ALLOWED_FOR_PARSING;
+
short provided_hour_tokens = IsUsedToken("HH") + IsUsedToken("HH12") +
IsUsedToken("HH24");
if (provided_hour_tokens > 1) {
@@ -198,9 +219,7 @@ FormatTokenizationResult IsoSqlFormatTokenizer::CheckIncompatibilities() const {
short provided_median_tokens = IsUsedToken("AM") + IsUsedToken("A.M.") +
IsUsedToken("PM") + IsUsedToken("P.M.");
- if (provided_median_tokens > 1) {
- return CONFLICTING_MERIDIEM_TOKENS_ERROR;
- }
+ if (provided_median_tokens > 1) return CONFLICTING_MERIDIEM_TOKENS_ERROR;
if (IsMeridiemIndicatorProvided() && IsUsedToken("HH24")) {
return MERIDIEM_CONFLICTS_WITH_HOUR_ERROR;
diff --git a/be/src/runtime/datetime-parser-common.cc b/be/src/runtime/datetime-parser-common.cc
index bf68425..5ff3430 100644
--- a/be/src/runtime/datetime-parser-common.cc
+++ b/be/src/runtime/datetime-parser-common.cc
@@ -17,11 +17,16 @@
#include "datetime-parser-common.h"
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/trim.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>
+#include "gutil/strings/ascii_ctype.h"
+#include "runtime/datetime-iso-sql-format-tokenizer.h"
#include "runtime/string-value.h"
#include "util/string-parser.h"
+using boost::algorithm::is_any_of;
using std::string;
using std::unordered_set;
@@ -73,7 +78,10 @@ void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_ty
ss << "PARSE ERROR: Both year and round year are provided";
break;
case CONFLICTING_YEAR_TOKENS_ERROR:
- ss << "PARSE ERROR: Multiple year token provided";
+ ss << "PARSE ERROR: Multiple year tokens provided";
+ break;
+ case CONFLICTING_MONTH_TOKENS_ERROR:
+ ss << "PARSE ERROR: Multiple month tokens provided";
break;
case DAY_OF_YEAR_TOKEN_CONFLICT:
ss << "PARSE ERROR: Day of year provided with day or month token";
@@ -118,6 +126,22 @@ void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_ty
case MISPLACED_FX_MODIFIER_ERROR:
ss << "PARSE ERROR: FX modifier should be at the beginning of the format string.";
break;
+ case QUARTER_NOT_ALLOWED_FOR_PARSING:
+ ss << "PARSE_ERROR: Quarter token is not allowed in a string to datetime "
+ "conversion";
+ break;
+ case DAY_OF_WEEK_NOT_ALLOWED_FOR_PARSING:
+ ss << "PARSE_ERROR: Day of week token is not allowed in a string to datetime "
+ "conversion";
+ break;
+ case DAY_NAME_NOT_ALLOWED_FOR_PARSING:
+ ss << "PARSE_ERROR: Day name token is not allowed in a string to datetime "
+ "conversion";
+ break;
+ case WEEK_NUMBER_NOT_ALLOWED_FOR_PARSING:
+ ss << "PARSE_ERROR: Week number token is not allowed in a string to datetime "
+ "conversion";
+ break;
default:
const StringValue& fmt = StringValue::FromStringVal(format);
ss << "Bad date/time conversion format: " << fmt.DebugString();
@@ -160,6 +184,56 @@ bool ParseFractionToken(const char* token, int token_len,
return true;
}
+int GetQuarter(int month) {
+ DCHECK(month > 0 && month <= 12);
+ return (month - 1) / 3 + 1;
+}
+
+bool ParseMonthNameToken(const DateTimeFormatToken& tok, const char* token_start,
+ const char** token_end, bool fx_modifier, int* month) {
+ DCHECK(token_start != nullptr);
+ DCHECK(tok.type == MONTH_NAME || tok.type == MONTH_NAME_SHORT);
+ DCHECK(month != nullptr);
+ DCHECK(token_end != nullptr && *token_end != nullptr);
+ DCHECK(token_start <= *token_end);
+ int token_len = *token_end - token_start;
+ if (token_len < SHORT_MONTH_NAME_LENGTH) return false;
+
+ string buff(token_start, token_len);
+ boost::to_lower(buff);
+ const string& prefix = buff.substr(0, SHORT_MONTH_NAME_LENGTH);
+ const auto month_iter = MONTH_PREFIX_TO_SUFFIX.find(prefix);
+ if (UNLIKELY(month_iter == MONTH_PREFIX_TO_SUFFIX.end())) return false;
+ if (tok.type == MONTH_NAME_SHORT) {
+ *month = month_iter->second.second;
+ return true;
+ }
+
+ DCHECK(tok.type == MONTH_NAME);
+ if (fx_modifier && !tok.fm_modifier) {
+ DCHECK(buff.length() == MAX_MONTH_NAME_LENGTH);
+ trim_right_if(buff, is_any_of(" "));
+ }
+
+ // Check if the remaining characters match.
+ const string& expected_suffix = month_iter->second.first;
+ if (buff.length() - SHORT_MONTH_NAME_LENGTH < expected_suffix.length()) return false;
+ const char* actual_suffix = buff.c_str() + SHORT_MONTH_NAME_LENGTH;
+ if (strncmp(actual_suffix, expected_suffix.c_str(), expected_suffix.length()) != 0) {
+ return false;
+ }
+ *month = month_iter->second.second;
+
+ // If the end of the month token wasn't identified because it wasn't followed by a
+ // separator then the end of the month token has to be adjusted.
+ if (prefix.length() + expected_suffix.length() < buff.length()) {
+ if (fx_modifier && !tok.fm_modifier) return false;
+ *token_end = token_start + prefix.length() + expected_suffix.length();
+ }
+
+ return true;
+}
+
int GetDayInYear(int year, int month, int day_in_month) {
DCHECK(month >= 1 && month <= 12);
const vector<int>& month_ranges = IsLeapYear(year) ? LEAP_YEAR_MONTH_RANGES :
@@ -221,6 +295,53 @@ string FormatTextToken(const DateTimeFormatToken& tok) {
return result;
}
+const string& FormatMonthName(int num_of_month, const DateTimeFormatToken& tok) {
+ DCHECK(num_of_month >= 1 && num_of_month <= 12);
+ DCHECK((tok.type == MONTH_NAME && tok.len == MAX_MONTH_NAME_LENGTH) ||
+ (tok.type == MONTH_NAME_SHORT && tok.len == SHORT_MONTH_NAME_LENGTH));
+ TimestampFunctions::TextCase text_case = GetOutputCase(tok);
+ if (tok.type == MONTH_NAME_SHORT) {
+ return TimestampFunctions::SHORT_MONTH_NAMES[text_case][num_of_month - 1];
+ }
+ if (tok.fm_modifier) {
+ return TimestampFunctions::MONTH_NAMES[text_case][num_of_month - 1];
+ }
+ return TimestampFunctions::MONTH_NAMES_PADDED[text_case][num_of_month - 1];
+}
+
+const string& FormatDayName(int day, const DateTimeFormatToken& tok) {
+ DCHECK(day >= 1 && day <= 7);
+ DCHECK((tok.type == DAY_NAME && tok.len == MAX_DAY_NAME_LENGTH) ||
+ (tok.type == DAY_NAME_SHORT && tok.len == SHORT_DAY_NAME_LENGTH));
+ TimestampFunctions::TextCase text_case = GetOutputCase(tok);
+ if (tok.type == DAY_NAME_SHORT) {
+ return TimestampFunctions::SHORT_DAY_NAMES[text_case][day - 1];
+ }
+ if (tok.fm_modifier) return TimestampFunctions::DAY_NAMES[text_case][day - 1];
+ return TimestampFunctions::DAY_NAMES_PADDED[text_case][day - 1];
+}
+
+TimestampFunctions::TextCase GetOutputCase(const DateTimeFormatToken& tok) {
+ DCHECK(tok.type == DAY_NAME || tok.type == DAY_NAME_SHORT || tok.type == MONTH_NAME ||
+ tok.type == MONTH_NAME_SHORT);
+ DCHECK(tok.val != nullptr);
+ DCHECK(tok.len >= SHORT_DAY_NAME_LENGTH && tok.len >= SHORT_MONTH_NAME_LENGTH);
+ if (strncmp(tok.val, "MMM", 3) == 0) return TimestampFunctions::CAPITALIZED;
+ if (ascii_islower(*tok.val)) {
+ return TimestampFunctions::LOWERCASE;
+ } else if (ascii_isupper(*(tok.val + 1))) {
+ return TimestampFunctions::UPPERCASE;
+ }
+ return TimestampFunctions::CAPITALIZED;
+}
+
+int GetWeekOfYear(int year, int month, int day) {
+ return (GetDayInYear(year, month, day) - 1) / 7 + 1;
+}
+
+int GetWeekOfMonth(int day) {
+ return (day - 1) / 7 + 1;
+}
}
}
diff --git a/be/src/runtime/datetime-parser-common.h b/be/src/runtime/datetime-parser-common.h
index 70c0eb4..7ab1a2b 100644
--- a/be/src/runtime/datetime-parser-common.h
+++ b/be/src/runtime/datetime-parser-common.h
@@ -19,9 +19,11 @@
#include <boost/date_time/posix_time/ptime.hpp>
#include "gutil/macros.h"
+#include <unordered_map>
#include <unordered_set>
#include <vector>
+#include "exprs/timestamp-functions.h"
#include "runtime/timestamp-value.h"
#include "udf/udf.h"
@@ -30,35 +32,68 @@ namespace impala {
using impala_udf::FunctionContext;
using impala_udf::StringVal;
-// Impala provides multiple algorithms to parse datetime formats:
-// - SimpleDateFormat: This is the one that is traditionally used with functions such
-// as to_timestamp() and from_timestamp().
-// - ISO SQL:2016 compliant datetime pattern matching. CAST(..FORMAT..) comes with
-// support for this pattern only.
-// This is a collection of the logic that is shared between the 2 types of pattern
-// matching including result codes, error reporting, format token types etc.
+/// Impala provides multiple algorithms to parse datetime formats:
+/// - SimpleDateFormat: This is the one that is traditionally used with functions such
+/// as to_timestamp() and from_timestamp().
+/// - ISO SQL:2016 compliant datetime pattern matching. CAST(..FORMAT..) comes with
+/// support for this pattern only.
+/// This is a collection of the logic that is shared between the 2 types of pattern
+/// matching including result codes, error reporting, format token types etc.
namespace datetime_parse_util {
-const int MONTH_LENGTHS[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
-
const int FRACTIONAL_SECOND_MAX_LENGTH = 9;
-// Describes ranges for months in a non-leap year expressed as number of days since
-// January 1.
+/// Describes ranges for months in a non-leap year expressed as number of days since
+/// January 1.
const std::vector<int> MONTH_RANGES = {
0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 };
-// Describes ranges for months in a leap year expressed as number of days since January 1.
+/// Describes ranges for months in a leap year expressed as number of days since
+/// January 1.
const std::vector<int> LEAP_YEAR_MONTH_RANGES = {
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 };
-// Contains all the possible result codes that can come from parsing a datetime format
-// pattern.
+/// Maps the 3-letter prefix of a month name to the suffix of the month name and the
+/// ordinal number of month. The key of this map can be used to uniquely identify the
+/// month while the suffix part of the value can be used for checking if the full month
+/// name was given correctly in the input of a string to datetime conversion. The number
+/// part of the value can be used as a result of the string to datetime conversion.
+const std::unordered_map<std::string, std::pair<std::string, int>>
+ MONTH_PREFIX_TO_SUFFIX = {
+ {"jan", {"uary", 1}},
+ {"feb", {"ruary", 2}},
+ {"mar", {"ch", 3}},
+ {"apr", {"il", 4}},
+ {"may", {"", 5}},
+ {"jun", {"e", 6}},
+ {"jul", {"y", 7}},
+ {"aug", {"ust", 8}},
+ {"sep", {"tember", 9}},
+ {"oct", {"ober", 10}},
+ {"nov", {"ember", 11}},
+ {"dec", {"ember", 12}}
+};
+
+/// Length of short month names like 'JAN', 'FEB', etc.
+const int SHORT_MONTH_NAME_LENGTH = 3;
+
+/// Length of the longest month name 'SEPTEMBER'.
+const int MAX_MONTH_NAME_LENGTH = 9;
+
+/// Length of short day names like 'MON', 'TUE', etc.
+const int SHORT_DAY_NAME_LENGTH = 3;
+
+/// Length of the longest day name 'WEDNESDAY'.
+const int MAX_DAY_NAME_LENGTH = 9;
+
+/// Contains all the possible result codes that can come from parsing a datetime format
+/// pattern.
enum FormatTokenizationResult {
SUCCESS,
GENERAL_ERROR,
DUPLICATE_FORMAT,
YEAR_WITH_ROUNDED_YEAR_ERROR,
CONFLICTING_YEAR_TOKENS_ERROR,
+ CONFLICTING_MONTH_TOKENS_ERROR,
DAY_OF_YEAR_TOKEN_CONFLICT,
CONFLICTING_HOUR_TOKENS_ERROR,
CONFLICTING_MERIDIEM_TOKENS_ERROR,
@@ -72,7 +107,11 @@ enum FormatTokenizationResult {
CONFLICTING_FRACTIONAL_SECOND_TOKENS_ERROR,
TEXT_TOKEN_NOT_CLOSED,
NO_DATETIME_TOKENS_ERROR,
- MISPLACED_FX_MODIFIER_ERROR
+ MISPLACED_FX_MODIFIER_ERROR,
+ QUARTER_NOT_ALLOWED_FOR_PARSING,
+ DAY_OF_WEEK_NOT_ALLOWED_FOR_PARSING,
+ DAY_NAME_NOT_ALLOWED_FOR_PARSING,
+ WEEK_NUMBER_NOT_ALLOWED_FOR_PARSING
};
/// Holds all the token types that serve as building blocks for datetime format patterns.
@@ -82,7 +121,6 @@ enum DateTimeFormatTokenType {
YEAR,
ROUND_YEAR,
MONTH_IN_YEAR,
- MONTH_IN_YEAR_SLT,
DAY_IN_MONTH,
DAY_IN_YEAR,
HOUR_IN_DAY,
@@ -99,7 +137,15 @@ enum DateTimeFormatTokenType {
ISO8601_ZULU_INDICATOR,
TEXT,
FM_MODIFIER,
- FX_MODIFIER
+ FX_MODIFIER,
+ MONTH_NAME,
+ MONTH_NAME_SHORT,
+ DAY_NAME,
+ DAY_NAME_SHORT,
+ DAY_OF_WEEK,
+ QUARTER_OF_YEAR,
+ WEEK_OF_YEAR,
+ WEEK_OF_MONTH
};
/// Indicates whether the cast is a 'datetime to string' or a 'string to datetime' cast.
@@ -224,15 +270,31 @@ void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_ty
bool ParseAndValidate(const char* token, int token_len, int min, int max,
int* result) WARN_UNUSED_RESULT;
+// Given the month calculates the quarter of year.
+int GetQuarter(int month);
+
bool ParseFractionToken(const char* token, int token_len,
DateTimeParseResult* result) WARN_UNUSED_RESULT;
+/// Gets a month name token (either full or short name) and converts it to the ordinal
+/// number of month between 1 and 12. Make sure 'tok.type' is either MONTH_NAME or
+/// MONTH_NAME_SHORT. Result is stored in 'month'. Returns false if the given month name
+/// is invalid. 'fx_modifier' indicates if there is an active FX modifier on the whole
+/// format.
+/// If the month part of the input is not followed by a separator then the end of the
+/// month part is found using MONTH_PREFIX_TO_SUFFIX. First, the 3 letter prefix of the
+/// month name identifies a particular month and then checks if the rest of the month
+/// name matches. If it does then '*token_end' is adjusted to point to the character
+/// right after the end of the month part.
+bool ParseMonthNameToken(const DateTimeFormatToken& tok, const char* token_start,
+ const char** token_end, bool fx_modifier, int* month)
+ WARN_UNUSED_RESULT;
+
inline bool IsLeapYear(int year) {
return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
}
-/// Given the year, month and the day in month calculates the day in year using
-/// MONTH_LENGTHS.
+/// Given the year, month and the day in month calculates the day in year.
int GetDayInYear(int year, int month, int day_in_month);
/// Gets a year and the number of days passed since 1st of January that year. Calculates
@@ -246,5 +308,29 @@ bool GetMonthAndDayFromDaysSinceJan1(int year, int days_since_jan1, int* month,
// a string to datetime conversion path.
std::string FormatTextToken(const DateTimeFormatToken& tok);
+/// Taking 'num_of_month' this function provides the name of the month. Based on the
+/// casing of the month format token in 'tok' this can format the results in 3 cases:
+/// Capitalized, full lowercase and full uppercase. E.g. "March", "march" and "MARCH".
+const std::string& FormatMonthName(int num_of_month, const DateTimeFormatToken& tok);
+
+/// Gets 'day' as a number between 1 and 7 that represents the day of week where Sunday
+/// is 1 and returns the name of the day. Based on the casing of the day format token in
+/// 'tok' this can format the results in 3 cases: Capitalized, full lowercase and full
+/// uppercase. E.g. "Monday", "monday" and "MONDAY".
+const std::string& FormatDayName(int day, const DateTimeFormatToken& tok);
+
+/// Returns how the output of a month or day token should be formatted. Make sure to
+/// call this when 'tok.type' is any of the month or day name tokens.
+TimestampFunctions::TextCase GetOutputCase(const DateTimeFormatToken& tok);
+
+/// Given the year, month and the day in month calculates the week in year where the
+/// first week of the year starts from 1st January.
+int GetWeekOfYear(int year, int month, int day);
+
+/// Given the day of month calculates the week in the month where the first week of the
+/// month starts from the first day of the month.
+int GetWeekOfMonth(int day);
+
}
+
}
diff --git a/be/src/runtime/datetime-simple-date-format-parser.cc b/be/src/runtime/datetime-simple-date-format-parser.cc
index ceeb654..2505fcf 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.cc
+++ b/be/src/runtime/datetime-simple-date-format-parser.cc
@@ -33,7 +33,6 @@ namespace impala {
namespace datetime_parse_util {
bool SimpleDateFormatTokenizer::initialized = false;
-bool SimpleDateFormatParser::initialized = false;
const int SimpleDateFormatTokenizer::DEFAULT_DATE_FMT_LEN = 10;
const int SimpleDateFormatTokenizer::DEFAULT_TIME_FMT_LEN = 8;
@@ -49,8 +48,6 @@ DateTimeFormatContext SimpleDateFormatTokenizer::DEFAULT_DATE_TIME_CTX[10];
DateTimeFormatContext SimpleDateFormatTokenizer::DEFAULT_ISO_DATE_TIME_CTX[10];
DateTimeFormatContext SimpleDateFormatTokenizer::DEFAULT_TIME_FRAC_CTX[10];
-unordered_map<StringValue, int> SimpleDateFormatParser::REV_MONTH_INDEX;
-
void SimpleDateFormatTokenizer::InitCtx() {
if (initialized) return;
@@ -97,23 +94,6 @@ void SimpleDateFormatTokenizer::InitCtx() {
initialized = true;
}
-void SimpleDateFormatParser::InitCtx() {
- if (initialized) return;
- // This needs to be lazily init'd because a StringValues hash function will be invoked
- // for each entry that's placed in the map. The hash function expects that
- // CpuInfo::Init() has already been called.
- REV_MONTH_INDEX = boost::unordered_map<StringValue, int>({
- {StringValue("jan"), 1}, {StringValue("feb"), 2},
- {StringValue("mar"), 3}, {StringValue("apr"), 4},
- {StringValue("may"), 5}, {StringValue("jun"), 6},
- {StringValue("jul"), 7}, {StringValue("aug"), 8},
- {StringValue("sep"), 9}, {StringValue("oct"), 10},
- {StringValue("nov"), 11}, {StringValue("dec"), 12}
- });
-
- initialized = true;
-}
-
bool SimpleDateFormatTokenizer::IsValidTZOffset(const char* str_begin,
const char* str_end) {
if (*str_begin == '+' || *str_begin == '-') {
@@ -201,7 +181,7 @@ bool SimpleDateFormatTokenizer::Tokenize(DateTimeFormatContext* dt_ctx,
}
if (tok_type == MONTH_IN_YEAR) {
if (UNLIKELY(tok_len > 3)) return false;
- if (tok_len == 3) tok_type = MONTH_IN_YEAR_SLT;
+ if (tok_len == 3) tok_type = MONTH_NAME_SHORT;
}
// In an output scenario, fmt_out_len is used to determine the print buffer size.
// If the format uses short tokens e.g. yyyy-MM-d, there must to be enough room in
@@ -450,7 +430,6 @@ const DateTimeFormatContext* SimpleDateFormatTokenizer::GetDefaultFormatContext(
bool SimpleDateFormatParser::ParseDateTime(const char* str, int str_len,
const DateTimeFormatContext& dt_ctx, DateTimeParseResult* dt_result) {
- DCHECK(initialized);
DCHECK(dt_ctx.fmt_len > 0);
DCHECK(dt_ctx.toks.size() > 0);
DCHECK(dt_result != NULL);
@@ -486,14 +465,12 @@ bool SimpleDateFormatParser::ParseDateTime(const char* str, int str_len,
if (!ParseAndValidate(tok_val, tok_len, 1, 12, &dt_result->month)) return false;
break;
}
- case MONTH_IN_YEAR_SLT: {
- char raw_buff[tok.len];
- std::transform(tok_val, tok_val + tok.len, raw_buff, ::tolower);
- StringValue buff(raw_buff, tok.len);
- boost::unordered_map<StringValue, int>::const_iterator iter =
- REV_MONTH_INDEX.find(buff);
- if (UNLIKELY(iter == REV_MONTH_INDEX.end())) return false;
- dt_result->month = iter->second;
+ case MONTH_NAME_SHORT: {
+ const char* tok_end = tok_val + tok_len;
+ if (!ParseMonthNameToken(tok, tok_val, &tok_end, dt_ctx.fx_modifier,
+ &dt_result->month)) {
+ return false;
+ }
break;
}
case DAY_IN_MONTH: {
@@ -560,4 +537,4 @@ bool SimpleDateFormatParser::ParseDateTime(const char* str, int str_len,
} // namespace datetime_parse_util
-} // nmespace impala
+} // namespace impala
diff --git a/be/src/runtime/datetime-simple-date-format-parser.h b/be/src/runtime/datetime-simple-date-format-parser.h
index 740b27c..6557b6e 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.h
+++ b/be/src/runtime/datetime-simple-date-format-parser.h
@@ -165,15 +165,6 @@ public:
/// Return true if the date/time was successfully parsed.
static bool ParseDateTime(const char* str, int len,
const DateTimeFormatContext& dt_ctx, DateTimeParseResult* dt_result);
-
- // Initializes REV_MONTH_INDEX;
- static void InitCtx();
-private:
- /// Used to indicate if the state has been initialized.
- static bool initialized;
-
- /// Lazily initialized pseudo-constant hashmap for mapping month names to an index.
- static boost::unordered_map<StringValue, int> REV_MONTH_INDEX;
};
}
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index b8d80e1..93a6039 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -234,9 +234,27 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
}
break;
}
+ case QUARTER_OF_YEAR: {
+ num_val = GetQuarter(d.month());
+ break;
+ }
case MONTH_IN_YEAR: num_val = d.month().as_number(); break;
- case MONTH_IN_YEAR_SLT: {
- result.append(d.month().as_short_string(), 3);
+ case MONTH_NAME:
+ case MONTH_NAME_SHORT: {
+ result.append(FormatMonthName(d.month().as_number(), tok));
+ break;
+ }
+ case WEEK_OF_YEAR: {
+ num_val = GetWeekOfYear(d.year(), d.month(), d.day());
+ break;
+ }
+ case WEEK_OF_MONTH: {
+ num_val = GetWeekOfMonth(d.day());
+ break;
+ }
+ case DAY_OF_WEEK: {
+ // Value in [1-7] where 1 represents Sunday, 2 represents Monday, etc.
+ num_val = d.day_of_week() + 1;
break;
}
case DAY_IN_MONTH: num_val = d.day(); break;
@@ -244,6 +262,11 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
num_val = GetDayInYear(d.year(), d.month(), d.day());
break;
}
+ case DAY_NAME:
+ case DAY_NAME_SHORT: {
+ result.append(FormatDayName(d.day_of_week() + 1, tok));
+ break;
+ }
case HOUR_IN_DAY: num_val = t.hours(); break;
case HOUR_IN_HALF_DAY: {
num_val = t.hours();
diff --git a/tests/query_test/test_cast_with_format.py b/tests/query_test/test_cast_with_format.py
index 88ab090..bd1b069 100644
--- a/tests/query_test/test_cast_with_format.py
+++ b/tests/query_test/test_cast_with_format.py
@@ -329,6 +329,424 @@ class TestCastWithFormat(ImpalaTestSuite):
"format 'RR')", query_options)
assert result.data == ["19"]
+ def test_month_name(self):
+ # Test different lowercase vs uppercase scenarios with the string to datetime path.
+ result = self.execute_query("select cast('2010-February-11' as timestamp FORMAT "
+ "'YYYY-MONTH-DD')")
+ assert result.data == ["2010-02-11 00:00:00"]
+
+ result = self.execute_query("select cast('2010-march-12' as timestamp FORMAT "
+ "'YYYY-MONTH-DD')")
+ assert result.data == ["2010-03-12 00:00:00"]
+
+ result = self.execute_query("select cast('APRIL 13 2010' as date FORMAT "
+ "'MONTH DD YYYY')")
+ assert result.data == ["2010-04-13"]
+
+ result = self.execute_query("select cast('2010 14 MAY' as timestamp FORMAT "
+ "'YYYY DD MONTH')")
+ assert result.data == ["2010-05-14 00:00:00"]
+
+ result = self.execute_query("select cast('2010 14 June' as timestamp FORMAT "
+ "'YYYY DD MONTH')")
+ assert result.data == ["2010-06-14 00:00:00"]
+
+ result = self.execute_query("select cast('2010 14 july' as timestamp FORMAT "
+ "'YYYY DD MONTH')")
+ assert result.data == ["2010-07-14 00:00:00"]
+
+ result = self.execute_query("select cast('2010 14 AUGUST' as timestamp FORMAT "
+ "'YYYY DD MONTH')")
+ assert result.data == ["2010-08-14 00:00:00"]
+
+ result = self.execute_query("select cast('2010 14 September' as date FORMAT "
+ "'YYYY DD month')")
+ assert result.data == ["2010-09-14"]
+
+ result = self.execute_query("select cast('2010 14 october' as date FORMAT "
+ "'YYYY DD month')")
+ assert result.data == ["2010-10-14"]
+
+ result = self.execute_query("select cast('2010 14 NOVEMBER' as date FORMAT "
+ "'YYYY DD month')")
+ assert result.data == ["2010-11-14"]
+
+ result = self.execute_query("select cast('2010 14 December' as date FORMAT "
+ "'YYYY DD month')")
+ assert result.data == ["2010-12-14"]
+
+ result = self.execute_query("select cast('2010 14 january' as date FORMAT "
+ "'YYYY DD month')")
+ assert result.data == ["2010-01-14"]
+
+ # Test different lowercase vs uppercase scenarios with the datetime to string path.
+ result = self.execute_query("select cast(date'2010-10-18' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["OCTOBER October october "]
+
+ result = self.execute_query("select cast(cast('2010-11-18' as timestamp) as string "
+ "FORMAT 'MONTH Month month')")
+ assert result.data == ["NOVEMBER November november "]
+
+ result = self.execute_query("select cast(date'2010-12-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["DECEMBER December december "]
+
+ result = self.execute_query("select cast(date'2010-01-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["JANUARY January january "]
+
+ result = self.execute_query("select cast(date'2010-02-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["FEBRUARY February february "]
+
+ result = self.execute_query("select cast(date'2010-03-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["MARCH March march "]
+
+ result = self.execute_query("select cast(date'2010-04-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["APRIL April april "]
+
+ result = self.execute_query("select cast(date'2010-05-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["MAY May may "]
+
+ result = self.execute_query("select cast(date'2010-06-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["JUNE June june "]
+
+ result = self.execute_query("select cast(date'2010-07-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["JULY July july "]
+
+ result = self.execute_query("select cast(date'2010-08-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["AUGUST August august "]
+
+ result = self.execute_query("select cast(date'2010-09-19' as string FORMAT "
+ "'MONTH Month month')")
+ assert result.data == ["SEPTEMBER September september"]
+
+ # Test odd casing of month token.
+ result = self.execute_query("select cast(date'2010-09-20' as string FORMAT "
+ "'MOnth MONth MONTh')")
+ assert result.data == ["SEPTEMBER SEPTEMBER SEPTEMBER"]
+
+ result = self.execute_query("select cast(date'2010-09-21' as string FORMAT "
+ "'montH monTH moNTH moNTH')")
+ assert result.data == ["september september september september"]
+
+ # Test different lowercase vs uppercase scenarios with the datetime to string path
+ # when FM is provided.
+ result = self.execute_query("select cast(date'2010-10-18' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["OCTOBER October october"]
+
+ result = self.execute_query("select cast(cast('2010-11-18' as timestamp) as string "
+ "FORMAT 'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["NOVEMBER November november"]
+
+ result = self.execute_query("select cast(date'2010-12-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["DECEMBER December december"]
+
+ result = self.execute_query("select cast(date'2010-01-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["JANUARY January january"]
+
+ result = self.execute_query("select cast(date'2010-02-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["FEBRUARY February february"]
+
+ result = self.execute_query("select cast(date'2010-03-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["MARCH March march"]
+
+ result = self.execute_query("select cast(date'2010-04-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["APRIL April april"]
+
+ result = self.execute_query("select cast(date'2010-05-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["MAY May may"]
+
+ result = self.execute_query("select cast(date'2010-06-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["JUNE June june"]
+
+ result = self.execute_query("select cast(date'2010-07-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["JULY July july"]
+
+ result = self.execute_query("select cast(date'2010-08-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["AUGUST August august"]
+
+ result = self.execute_query("select cast(date'2010-09-19' as string FORMAT "
+ "'FMMONTH FMMonth FMmonth')")
+ assert result.data == ["SEPTEMBER September september"]
+
+ # Incorrect month name.
+ result = self.execute_query("select cast('2010 15 JU' as timestamp FORMAT "
+ "'YYYY DD MONTH')")
+ assert result.data == ["NULL"]
+
+ # MONTH token without surrounding separators.
+ result = self.execute_query("select cast('2010SEPTEMBER17' as date FORMAT "
+ "'YYYYMONTHDD')")
+ assert result.data == ["2010-09-17"]
+
+ result = self.execute_query("select cast('2010OCTOBER17' as timestamp FORMAT "
+ "'YYYYMONTHDD')")
+ assert result.data == ["2010-10-17 00:00:00"]
+
+ # Applying FX and FM modifiers on Month token.
+ result = self.execute_query("select cast(cast('2010-07-20' as timestamp) as string "
+ "FORMAT 'YYYYmonthDD')")
+ assert result.data == ["2010july 20"]
+
+ result = self.execute_query("select cast(date'2010-09-20' as string "
+ "FORMAT 'YYYYmonthDD')")
+ assert result.data == ["2010september20"]
+
+ result = self.execute_query("select cast(cast('2010-08-20' as timestamp) as string "
+ "FORMAT 'YYYYFMMonthDD')")
+ assert result.data == ["2010August20"]
+
+ result = self.execute_query("select cast(cast('2010-10-20' as timestamp) as string "
+ "FORMAT 'FXYYYYFMMONTHDD')")
+ assert result.data == ["2010OCTOBER20"]
+
+ result = self.execute_query("select cast('2010-February-19' as timestamp FORMAT "
+ "'FXYYYY-MONTH-DD')")
+ assert result.data == ["NULL"]
+
+ result = self.execute_query("select cast('2010-February -21' as timestamp FORMAT "
+ "'FXYYYY-MONTH-DD')")
+ assert result.data == ["2010-02-21 00:00:00"]
+
+ result = self.execute_query("select cast('2010-February 22' as date FORMAT "
+ "'FXYYYY-MONTHDD')")
+ assert result.data == ["2010-02-22"]
+
+ result = self.execute_query("select cast('2010-February-20' as timestamp FORMAT "
+ "'FXYYYY-FMMONTH-DD')")
+ assert result.data == ["2010-02-20 00:00:00"]
+
+ def test_short_month_name(self):
+ # Test different lowercase vs uppercase scenarios with the string to datetime path.
+ result = self.execute_query("select cast('2015-Feb-11' as timestamp FORMAT "
+ "'YYYY-MON-DD')")
+ assert result.data == ["2015-02-11 00:00:00"]
+
+ result = self.execute_query("select cast('2015-mar-12' as timestamp FORMAT "
+ "'YYYY-MON-DD')")
+ assert result.data == ["2015-03-12 00:00:00"]
+
+ result = self.execute_query("select cast('APR 13 2015' as timestamp FORMAT "
+ "'MON DD YYYY')")
+ assert result.data == ["2015-04-13 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 MAY' as timestamp FORMAT "
+ "'YYYY DD MON')")
+ assert result.data == ["2015-05-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 jun' as timestamp FORMAT "
+ "'YYYY DD MON')")
+ assert result.data == ["2015-06-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 Jul' as timestamp FORMAT "
+ "'YYYY DD MON')")
+ assert result.data == ["2015-07-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 AUG' as timestamp FORMAT "
+ "'YYYY DD MON')")
+ assert result.data == ["2015-08-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 Sep' as timestamp FORMAT "
+ "'YYYY DD mon')")
+ assert result.data == ["2015-09-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 oct' as timestamp FORMAT "
+ "'YYYY DD mon')")
+ assert result.data == ["2015-10-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 nov' as timestamp FORMAT "
+ "'YYYY DD mon')")
+ assert result.data == ["2015-11-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 DEC' as timestamp FORMAT "
+ "'YYYY DD mon')")
+ assert result.data == ["2015-12-14 00:00:00"]
+
+ result = self.execute_query("select cast('2015 14 Jan' as timestamp FORMAT "
+ "'YYYY DD mon')")
+ assert result.data == ["2015-01-14 00:00:00"]
+
+ # Test different lowercase vs uppercase scenarios with the datetime to string path.
+ result = self.execute_query("select cast(date'2015-10-18' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["OCT Oct oct"]
+
+ result = self.execute_query("select cast(cast('2015-11-18' as timestamp) as string "
+ "FORMAT 'MON Mon mon')")
+ assert result.data == ["NOV Nov nov"]
+
+ result = self.execute_query("select cast(date'2015-12-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["DEC Dec dec"]
+
+ result = self.execute_query("select cast(date'2015-01-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["JAN Jan jan"]
+
+ result = self.execute_query("select cast(date'2015-02-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["FEB Feb feb"]
+
+ result = self.execute_query("select cast(date'2015-03-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["MAR Mar mar"]
+
+ result = self.execute_query("select cast(date'2015-04-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["APR Apr apr"]
+
+ result = self.execute_query("select cast(date'2015-05-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["MAY May may"]
+
+ result = self.execute_query("select cast(date'2015-06-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["JUN Jun jun"]
+
+ result = self.execute_query("select cast(date'2015-07-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["JUL Jul jul"]
+
+ result = self.execute_query("select cast(date'2015-08-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["AUG Aug aug"]
+
+ result = self.execute_query("select cast(date'2015-09-19' as string FORMAT "
+ "'MON Mon mon')")
+ assert result.data == ["SEP Sep sep"]
+
+ # Test odd casing of short month token.
+ result = self.execute_query("select cast(date'2010-09-22' as string FORMAT "
+ "'MOn mON moN')")
+ assert result.data == ["SEP sep sep"]
+
+ # Incorrect month name.
+ result = self.execute_query("select cast('2015 15 JU' as timestamp FORMAT "
+ "'YYYY DD MON')")
+ assert result.data == ["NULL"]
+
+ # MON token without separators in the format.
+ result = self.execute_query("select cast('2015AUG17' as date FORMAT "
+ "'YYYYMONDD')")
+ assert result.data == ["2015-08-17"]
+
+ result = self.execute_query("select cast(cast('2015-07-20' as timestamp) as string "
+ "FORMAT 'YYYYmonDD')")
+ assert result.data == ["2015jul20"]
+
+ # FX/FM has no effect on MON.
+ result = self.execute_query("select cast(cast('2015-08-21' as timestamp) as string "
+ "FORMAT 'FXYYYYmonDD')")
+ assert result.data == ["2015aug21"]
+
+ result = self.execute_query("select cast(date'2015-09-22' as string "
+ "FORMAT 'FXYYYYFMMonDD')")
+ assert result.data == ["2015Sep22"]
+
+ def test_week_of_year(self):
+ result = self.execute_query("select cast(cast('2019-01-01' as timestamp) as string "
+ "FORMAT 'WW')")
+ assert result.data == ["01"]
+
+ result = self.execute_query("select cast(date'2019-01-07' as string "
+ "FORMAT 'WW')")
+ assert result.data == ["01"]
+
+ result = self.execute_query("select cast(cast('2019-01-08' as timestamp) as string "
+ "FORMAT 'WW')")
+ assert result.data == ["02"]
+
+ result = self.execute_query("select cast(date'2019-02-01' as string "
+ "FORMAT 'WW')")
+ assert result.data == ["05"]
+
+ result = self.execute_query("select cast(cast('2019-02-05' as timestamp) as string "
+ "FORMAT 'WW')")
+ assert result.data == ["06"]
+
+ result = self.execute_query("select cast(date'2019-12-01' as string "
+ "FORMAT 'WW')")
+ assert result.data == ["48"]
+
+ result = self.execute_query("select cast(cast('2019-12-02' as timestamp) as string "
+ "FORMAT 'WW')")
+ assert result.data == ["48"]
+
+ result = self.execute_query("select cast(date'2019-12-03' as string "
+ "FORMAT 'WW')")
+ assert result.data == ["49"]
+
+ result = self.execute_query("select cast(cast('2019-12-30' as timestamp) as string "
+ "FORMAT 'WW')")
+ assert result.data == ["52"]
+
+ result = self.execute_query("select cast(date'2019-12-31' as string "
+ "FORMAT 'WW')")
+ assert result.data == ["53"]
+
+ result = self.execute_query("select cast(cast('2020-01-01' as timestamp) as string "
+ "FORMAT 'WW')")
+ assert result.data == ["01"]
+
+ def test_week_of_month(self):
+ result = self.execute_query("select cast(cast('2019-01-01' as timestamp) as string "
+ "FORMAT 'W')")
+ assert result.data == ["1"]
+
+ result = self.execute_query("select cast(date'2019-01-07' as string "
+ "FORMAT 'W')")
+ assert result.data == ["1"]
+
+ result = self.execute_query("select cast(cast('2019-01-08' as timestamp) as string "
+ "FORMAT 'W')")
+ assert result.data == ["2"]
+
+ result = self.execute_query("select cast(date'2019-01-14' as string "
+ "FORMAT 'W')")
+ assert result.data == ["2"]
+
+ result = self.execute_query("select cast(cast('2019-01-15' as timestamp) as string "
+ "FORMAT 'W')")
+ assert result.data == ["3"]
+
+ result = self.execute_query("select cast(date'2019-01-21' as string "
+ "FORMAT 'W')")
+ assert result.data == ["3"]
+
+ result = self.execute_query("select cast(cast('2019-01-22' as timestamp) as string "
+ "FORMAT 'W')")
+ assert result.data == ["4"]
+
+ result = self.execute_query("select cast(date'2019-01-28' as string "
+ "FORMAT 'W')")
+ assert result.data == ["4"]
+
+ result = self.execute_query("select cast(cast('2019-01-29' as timestamp) as string "
+ "FORMAT 'W')")
+ assert result.data == ["5"]
+
+ result = self.execute_query("select cast(date'2019-02-01' as string "
+ "FORMAT 'W')")
+ assert result.data == ["1"]
+
def test_day_in_year(self):
# Test "day in year" token in a non leap year scenario
result = self.execute_query("select cast('2019 1' as timestamp FORMAT 'YYYY DDD')")
@@ -384,6 +802,104 @@ class TestCastWithFormat(ImpalaTestSuite):
"format 'YYYY DDD') as string format'DDD')")
assert result.data == ["123"]
+ def test_day_name(self):
+ # Different lowercase and uppercase scenarios.
+ result = self.execute_query("select cast(date'2019-11-13' as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["WEDNESDAY Wednesday wednesday WED Wed wed"]
+
+ result = self.execute_query("select cast(cast('2019-11-14' as timestamp) as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["THURSDAY Thursday thursday THU Thu thu"]
+
+ result = self.execute_query("select cast(date'2019-11-15' as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["FRIDAY Friday friday FRI Fri fri"]
+
+ result = self.execute_query("select cast(cast('2019-11-16' as timestamp) as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["SATURDAY Saturday saturday SAT Sat sat"]
+
+ result = self.execute_query("select cast(date'2019-11-17' as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["SUNDAY Sunday sunday SUN Sun sun"]
+
+ result = self.execute_query("select cast(cast('2019-11-18' as timestamp) as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["MONDAY Monday monday MON Mon mon"]
+
+ result = self.execute_query("select cast(date'2019-11-19' as string "
+ "format 'DAY Day day DY Dy dy')")
+ assert result.data == ["TUESDAY Tuesday tuesday TUE Tue tue"]
+
+ # Different lowercase and uppercase scenarios when FM is provided.
+ result = self.execute_query("select cast(cast('2019-11-13' as timestamp) as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["WEDNESDAY Wednesday wednesday WED Wed wed"]
+
+ result = self.execute_query("select cast(date'2019-11-14' as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["THURSDAY Thursday thursday THU Thu thu"]
+
+ result = self.execute_query("select cast(cast('2019-11-15' as timestamp) as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["FRIDAY Friday friday FRI Fri fri"]
+
+ result = self.execute_query("select cast(date'2019-11-16' as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["SATURDAY Saturday saturday SAT Sat sat"]
+
+ result = self.execute_query("select cast(cast('2019-11-17' as timestamp) as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["SUNDAY Sunday sunday SUN Sun sun"]
+
+ result = self.execute_query("select cast(date'2019-11-18' as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["MONDAY Monday monday MON Mon mon"]
+
+ result = self.execute_query("select cast(cast('2019-11-19' as timestamp) as string "
+ "format 'FMDAY FMDay FMday FMDY FMDy FMdy')")
+ assert result.data == ["TUESDAY Tuesday tuesday TUE Tue tue"]
+
+ # Test odd casing of day token.
+ result = self.execute_query("select cast(date'2010-01-20' as string FORMAT "
+ "'DAy dAY daY dY')")
+ assert result.data == ["WEDNESDAY wednesday wednesday wed"]
+
+ # Day token without surrounding separators
+ result = self.execute_query("select cast(date'2019-11-11' as string "
+ "format 'YYYYDayMonth')")
+ assert result.data == ["2019Monday November "]
+
+ result = self.execute_query("select cast(cast('2019-11-12' as timestamp) as string "
+ "format 'YYYYDYDD')")
+ assert result.data == ["2019TUE12"]
+
+ result = self.execute_query("select cast(date'2019-11-11' as string "
+ "format 'YYYYDayMonth')")
+ assert result.data == ["2019Monday November "]
+
+ result = self.execute_query("select cast(cast('2019-11-12' as timestamp) as string "
+ "format 'YYYYDYDD')")
+ assert result.data == ["2019TUE12"]
+
+ # Day token with FM and FX modifiers.
+ result = self.execute_query("select cast(cast('2019-01-01' as timestamp) as string "
+ "format 'FXYYYY DAY DD')")
+ assert result.data == ["2019 TUESDAY 01"]
+
+ result = self.execute_query("select cast(date'2019-01-01' as string "
+ "format 'FXYYYY FMDAY DD')")
+ assert result.data == ["2019 TUESDAY 01"]
+
+ result = self.execute_query("select cast(cast('2019-02-02' as timestamp) as string "
+ "format 'FXYYYY DY DD')")
+ assert result.data == ["2019 SAT 02"]
+
+ result = self.execute_query("select cast(date'2019-02-02' as string "
+ "format 'FXYYYY FMDY DD')")
+ assert result.data == ["2019 SAT 02"]
+
def test_second_of_day(self):
# Check boundaries
result = self.client.execute("select cast('2019-11-10 86399.11' as "
@@ -417,6 +933,43 @@ class TestCastWithFormat(ImpalaTestSuite):
"as string format 'SSSSS')")
assert result.data == ["86399"]
+ def test_day_of_week(self):
+ # Sunday is 1
+ result = self.execute_query("select cast(cast('2019-11-03' as timestamp) as string "
+ "FORMAT 'D')")
+ assert result.data == ["1"]
+
+ result = self.execute_query("select cast(cast('2019-11-03' as date) as string "
+ "FORMAT 'D')")
+ assert result.data == ["1"]
+
+ # Wednesday is 4
+ result = self.execute_query("select cast(cast('2019-11-06' as timestamp) as string "
+ "FORMAT 'D')")
+ assert result.data == ["4"]
+
+ result = self.execute_query("select cast(cast('2019-11-06' as date) as string "
+ "FORMAT 'D')")
+ assert result.data == ["4"]
+
+ # Saturday is 7
+ result = self.execute_query("select cast(cast('2019-11-09' as timestamp) as string "
+ "FORMAT 'D')")
+ assert result.data == ["7"]
+
+ result = self.execute_query("select cast(cast('2019-11-09' as date) as string "
+ "FORMAT 'D')")
+ assert result.data == ["7"]
+
+ # FX and FM modifier does not pad day of week values with zeros.
+ result = self.execute_query("select cast(cast('2019-12-01' as date) as string "
+ "FORMAT 'FXD')")
+ assert result.data == ["1"]
+
+ result = self.execute_query("select cast(cast('2019-12-02' as date) as string "
+ "FORMAT 'FXFMD')")
+ assert result.data == ["2"]
+
def test_fraction_seconds(self):
result = self.execute_query("select cast('2019-11-08 123456789' as "
"timestamp FORMAT 'YYYY-MM-DD FF9')")
@@ -977,6 +1530,39 @@ class TestCastWithFormat(ImpalaTestSuite):
"'fxYYYY-fmMM-DD')")
assert result.data == ["2019-05-10"]
+ def test_quarter(self):
+ result = self.client.execute("select cast(date'2001-01-01' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 1 01"]
+
+ result = self.client.execute("select cast(date'2001-03-31' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 1 03"]
+
+ result = self.client.execute("select cast(date'2001-4-1' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 2 04"]
+
+ result = self.client.execute("select cast(date'2001-6-30' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 2 06"]
+
+ result = self.client.execute("select cast(date'2001-7-1' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 3 07"]
+
+ result = self.client.execute("select cast(date'2001-9-30' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 3 09"]
+
+ result = self.client.execute("select cast(date'2001-10-1' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 4 10"]
+
+ result = self.client.execute("select cast(date'2001-12-31' as string "
+ "FORMAT 'YYYY Q MM')")
+ assert result.data == ["2001 4 12"]
+
def test_format_parse_errors(self):
# Invalid format
err = self.execute_query_expect_failure(self.client,
@@ -997,14 +1583,14 @@ class TestCastWithFormat(ImpalaTestSuite):
"select cast('2017-05-01' as timestamp format 'YYYY-MM-DD-YYYY')")
assert "Invalid duplication of format element" in str(err)
- # Multiple year token provided
+ # Multiple year tokens provided
err = self.execute_query_expect_failure(self.client,
"select cast('2017-05-01' as timestamp format 'YYYY-MM-DD-YY')")
- assert "Multiple year token provided" in str(err)
+ assert "Multiple year tokens provided" in str(err)
err = self.execute_query_expect_failure(self.client,
"select cast('2017-05-01' as timestamp format 'YYY-MM-DD-Y')")
- assert "Multiple year token provided" in str(err)
+ assert "Multiple year tokens provided" in str(err)
# Year and round year conflict
err = self.execute_query_expect_failure(self.client,
@@ -1015,6 +1601,36 @@ class TestCastWithFormat(ImpalaTestSuite):
"select cast('2017-05-01' as timestamp format 'RR-MM-DD-YYY')")
assert "Both year and round year are provided" in str(err)
+ # Quarter token not allowed in a string to datetime conversion.
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-1-01' as timestamp format 'YYYY-Q-DDD')")
+ assert "Quarter token is not allowed in a string to datetime conversion" in str(err)
+
+ # Conflict between MM, MONTH and MON tokens
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-05-01' as timestamp format 'YYYY-MM-DD-MONTH')")
+ assert "Multiple month tokens provided" in str(err)
+
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-05-01' as timestamp format 'YYYY-MM-DD-MON')")
+ assert "Multiple month tokens provided" in str(err)
+
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-05-01' as timestamp format 'YYYY-MONTH-DD-MON')")
+ assert "Multiple month tokens provided" in str(err)
+
+ # Week of year token not allowed in a string to datetime conversion.
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-1-01' as timestamp format 'YYYY-WW-DD')")
+ assert "Week number token is not allowed in a string to datetime conversion" in \
+ str(err)
+
+ # Week of month token not allowed in a string to datetime conversion.
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-1-01' as timestamp format 'YYYY-W-DD')")
+ assert "Week number token is not allowed in a string to datetime conversion" in \
+ str(err)
+
# Day of year conflict
err = self.execute_query_expect_failure(self.client,
"select cast('2017-05-01' as timestamp format 'YYYY-MM-DDD')")
@@ -1024,6 +1640,26 @@ class TestCastWithFormat(ImpalaTestSuite):
"select cast('2017-05-01' as timestamp format 'YYYY-DD-DDD')")
assert "Day of year provided with day or month token" in str(err)
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-MAY-01' as timestamp format 'YYYY-MONTH-DDD')")
+ assert "Day of year provided with day or month token" in str(err)
+
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-JUN-01' as timestamp format 'YYYY-MON-DDD')")
+ assert "Day of year provided with day or month token" in str(err)
+
+ # Day of week token not allowed in a string to datetime conversion.
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-1-02' as timestamp format 'YYYY-D-MM')")
+ assert "Day of week token is not allowed in a string to datetime conversion" in \
+ str(err)
+
+ # Day name token not allowed in a string to datetime conversion.
+ err = self.execute_query_expect_failure(self.client,
+ "select cast('2017-1-02 Monday' as timestamp format 'YYYY-DD-MM DAY')")
+ assert "Day name token is not allowed in a string to datetime conversion" in \
+ str(err)
+
# Conflict between hour tokens
err = self.execute_query_expect_failure(self.client,
"select cast('2017-05-01' as timestamp format 'YYYY-MM-DD HH:HH24')")