You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/03/13 22:20:24 UTC

[3/3] impala git commit: IMPALA-5315: Cast to timestamp fails for YYYY-M-D format

IMPALA-5315: Cast to timestamp fails for YYYY-M-D format

This change allows casting of a string in 'lazy' date/time
format to timestamp. The supported lazy date formats are:
  yyyy-[M]M-[d]d
  yyyy-[M]M-[d]d [H]H:[m]m:[s]s[.SSSSSSSSS]
  [H]H:[m]m:[s]s[.SSSSSSSSS]

We will incur a SCAN performance penalty (approximately 1/2
TotalReadThroughput) when the string is in one of these
lazy date/time format.

Testing:
Benchmarked the performance consequence by executing this SQL on
a private build over 3.8 billion rows:
select min(cast (time_string as timestamp)) from private.impala_5315

Added tests for valid and invalid date/time format strings
in expr-test.cc to be inline with existing tests for CAST() function.

Added end-to-end tests into exprs.test and
select-lazy-timestamp.test to exercise the new function within
the context of a query.

Added tests to exercise the leading and trailing white space trimming
behaviour in default and lazy date/time string format (IMPALA-6630).

Change-Id: Ib9a184a09d7e7783f04d47588537612c2ecec28f
Reviewed-on: http://gerrit.cloudera.org:8080/7009
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0d7787fe
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0d7787fe
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0d7787fe

Branch: refs/heads/master
Commit: 0d7787fe4df1ab2bb8231b0ee0912e3cf2787f9e
Parents: 6d8ce64
Author: Vincent Tran <vt...@cloudera.com>
Authored: Sat May 27 03:02:19 2017 -0400
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 13 22:10:18 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 117 ++++++++++++-
 be/src/runtime/timestamp-parse-util.cc          | 168 ++++++++++++++++++-
 be/src/runtime/timestamp-parse-util.h           |  29 ++++
 testdata/data/lazy_timestamp.csv                |  13 ++
 .../queries/QueryTest/exprs.test                |  36 ++++
 .../QueryTest/select-lazy-timestamp.test        |  20 +++
 tests/query_test/test_scanners.py               |  22 +++
 7 files changed, 398 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index e02ff6a..bd25328 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3051,6 +3051,121 @@ TEST_F(ExprTest, CastExprs) {
   TestStringValue("cast(cast(cast('2012-01-01 09:10:11.123456789' as timestamp) as"
       " timestamp) as string)", "2012-01-01 09:10:11.123456789");
 
+  // Test casting of lazy date and/or time format string to timestamp
+  TestTimestampValue(
+      "cast('2001-1-2' as timestamp)", TimestampValue::Parse("2001-01-02 00:00:00"));
+  TestTimestampValue(
+      "cast('2001-01-3' as timestamp)", TimestampValue::Parse("2001-01-03 00:00:00"));
+  TestTimestampValue(
+      "cast('2001-1-21' as timestamp)", TimestampValue::Parse("2001-01-21 00:00:00"));
+  TestTimestampValue("cast('2001-1-21 12:5:30' as timestamp)",
+      TimestampValue::Parse("2001-01-21 12:05:30"));
+  TestTimestampValue("cast('2001-1-21 13:5:05' as timestamp)",
+      TimestampValue::Parse("2001-01-21 13:05:05"));
+  TestTimestampValue("cast('2001-1-21 1:2:3' as timestamp)",
+      TimestampValue::Parse("2001-01-21 01:02:03"));
+  TestTimestampValue("cast('2001-1-21 1:5:31.12345' as timestamp)",
+      TimestampValue::Parse("2001-01-21 01:05:31.123450000"));
+  TestTimestampValue("cast('2001-1-21 1:5:31.12345678910111213' as timestamp)",
+      TimestampValue::Parse("2001-01-21 01:05:31.123456789"));
+  TestTimestampValue(
+      "cast('1:05:1.12' as timestamp)", TimestampValue::Parse("01:05:01.120000000"));
+  TestTimestampValue("cast('1:05:1' as timestamp)", TimestampValue::Parse("01:05:01"));
+  TestTimestampValue("cast('        2001-01-9 1:05:1        ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestIsNull("cast('2001-1-21     11:2:3' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1:05:31.123456foo' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('10/feb/10' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-foo1-2bar' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909/1-/2' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-1-2 12:32:1.111bar' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:1.111.111.2' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test various ways of truncating a "lazy" format to produce an invalid timestamp.
+  TestIsNull("cast('1909-10-2 12:32:1.' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:11.' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:11. ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32: ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:32:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:2:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:2' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:2 ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12 ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 2' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10- ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test missing number from format.
+  TestIsNull("cast('1909-10-2 12:32:.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12::1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 :32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10- 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909--2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('-10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test duplicate separators - should return NULL because not a valid format.
+  TestIsNull("cast('1909--10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10--2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12::32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32::1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:1..9999' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test numbers with too many digits in date/time - should return NULL because not a
+  // valid timestamp.
+  TestIsNull("cast('19097-10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-107-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-277 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 127:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:327:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:177.9999' as timestamp)", TYPE_TIMESTAMP);
+
+  // IMPALA-6630: Test whitespace trimming mechanism when cast from string to timestamp
+  TestTimestampValue("cast(' \t\r\n 2001-01-09 01:05:01.123456789 \t\r\n' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue("cast(' \t\r\n 2001-01-09T01:05:01.123456789 \t\r\n' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue("cast('  \t\r\n      2001-01-09 01:05:01   \t\r\n  ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n      2001-01-09T01:05:01   \t\r\n  ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n      2001-01-09   \t\r\n     ' as timestamp)",
+      TimestampValue::Parse("2001-01-09"));
+  TestTimestampValue("cast('  \t\r\n      01:05:01   \t\r\n     ' as timestamp)",
+      TimestampValue::Parse("01:05:01"));
+  TestTimestampValue("cast(' \t\r\n 01:05:01.123456789   \t\r\n     ' as timestamp)",
+      TimestampValue::Parse("01:05:01.123456789"));
+  TestTimestampValue("cast('  \t\r\n      2001-1-9 1:5:1    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n  2001-1-9 1:5:1.12345678  \t\r\n ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456780"));
+  TestTimestampValue("cast('  \t\r\n      1:5:1    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("01:05:01"));
+  TestTimestampValue("cast('  \t\r\n      1:5:1.12345678    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("01:05:01.123456780"));
+  TestTimestampValue("cast('  \t\r\n      2001-1-9    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("2001-01-09"));
+
+  // Test invalid whitespace locations in strings to be casted to timestamp
+  TestIsNull(
+      "cast(' \t\r\n  2001-01-09      01:05:01  \t\r\n ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09   01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09\t01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09\r01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09\n01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9    1:5:1' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9\t1:5:1' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9\r1:5:1' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9\n1:5:1' as timestamp)", TYPE_TIMESTAMP);
+
   // IMPALA-3163: Test precise conversion from Decimal to Timestamp.
   TestTimestampValue("cast(cast(1457473016.1230 as decimal(17,4)) as timestamp)",
       TimestampValue::Parse("2016-03-08 21:36:56.123000000", 29));
@@ -5984,7 +6099,7 @@ TEST_F(ExprTest, TimestampFunctions) {
   TestIsNull("timestamp_cmp('','1966-05-04 15:33:45')", TYPE_INT);
   TestIsNull("timestamp_cmp(NULL,'1966-05-04 15:33:45')", TYPE_INT);
   // Invalid timestamp test case
-  TestIsNull("timestamp_cmp('1966-5-4 5:33:45','1966-5-4 15:33:45')", TYPE_INT);
+  TestIsNull("timestamp_cmp('1966-5-4 50:33:45','1966-5-4 15:33:45')", TYPE_INT);
 
   TestValue("int_months_between('1967-07-19','1966-06-04')", TYPE_INT, 13);
   TestValue("int_months_between('1966-06-04 16:34:45','1967-07-19 15:33:46')",

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/runtime/timestamp-parse-util.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index e64d904..c444214 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -17,6 +17,8 @@
 
 #include "runtime/timestamp-parse-util.h"
 
+#include <algorithm>
+
 #include <boost/assign/list_of.hpp>
 #include <boost/date_time/gregorian/gregorian.hpp>
 #include <boost/unordered_map.hpp>
@@ -214,8 +216,150 @@ bool TimestampParser::ParseFormatTokens(DateTimeFormatContext* dt_ctx) {
   return dt_ctx->has_date_toks || dt_ctx->has_time_toks;
 }
 
+const char* TimestampParser::ParseDigitToken(const char* str, const char* str_end) {
+  const char* tok_end = str;
+  while (tok_end < str_end) {
+    if (!isdigit(*tok_end)) return tok_end;
+    ++tok_end;
+  }
+  return tok_end;
+}
+
+const char* TimestampParser::ParseSeparatorToken(
+    const char* str, const char* str_end, const char sep) {
+  const char* tok_end = str;
+  while (tok_end < str_end) {
+    if (*tok_end != sep) return tok_end;
+    ++tok_end;
+  }
+  return tok_end;
+}
+
+bool TimestampParser::ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx) {
+  DCHECK(dt_ctx != NULL);
+  DCHECK(dt_ctx->fmt != NULL);
+  DCHECK_GT(dt_ctx->fmt_len, 0);
+  DCHECK_EQ(dt_ctx->toks.size(), 0);
+  const char* str_begin = dt_ctx->fmt;
+  const char* str_end = str_begin + dt_ctx->fmt_len;
+  const char* str = str_begin;
+  const char* tok_end;
+
+  // Parse the 4-digit year
+  tok_end = ParseDigitToken(str, str_end);
+  if (tok_end - str == 4) {
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(YEAR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Check for the date separator '-'
+    tok_end = ParseSeparatorToken(str, str_end, '-');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Parse the 1 or 2 digit month.
+    tok_end = ParseDigitToken(str, str_end);
+    if (tok_end - str != 1 && tok_end - str != 2) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(MONTH_IN_YEAR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Check for the date separator '-'
+    tok_end = ParseSeparatorToken(str, str_end, '-');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Parse the 1 or 2 digit day in month
+    tok_end = ParseDigitToken(str, str_end);
+    if (tok_end - str != 1 && tok_end - str != 2) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(DAY_IN_MONTH, str - str_begin, tok_end - str, str));
+    str = tok_end;
+    dt_ctx->has_date_toks = true;
+
+    // If the string ends here, we only have a date component
+    if (str == str_end) return true;
+
+    // Check for the space between date and time component
+    tok_end = ParseSeparatorToken(str, str_end, ' ');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Invalid format if date-time separator is not followed by more digits
+    if (str > str_end) return false;
+    tok_end = ParseDigitToken(str, str_end);
+  }
+
+  // Parse the 1 or 2 digit hour
+  if (tok_end - str != 1 && tok_end - str != 2) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(HOUR_IN_DAY, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Check for the time component separator ':'
+  tok_end = ParseSeparatorToken(str, str_end, ':');
+  if (tok_end - str != 1) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Parse the 1 or 2 digit minute
+  tok_end = ParseDigitToken(str, str_end);
+  if (tok_end - str != 1 && tok_end - str != 2) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(MINUTE_IN_HOUR, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Check for the time component separator ':'
+  tok_end = ParseSeparatorToken(str, str_end, ':');
+  if (tok_end - str != 1) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Parse the 1 or 2 digit second
+  tok_end = ParseDigitToken(str, str_end);
+  if (tok_end - str != 1 && tok_end - str != 2) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(SECOND_IN_MINUTE, str - str_begin, tok_end - str, str));
+  str = tok_end;
+  dt_ctx->has_time_toks = true;
+
+  // There is more to parse, there maybe a fractional component.
+  if (str < str_end) {
+    tok_end = ParseSeparatorToken(str, str_end, '.');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Invalid format when there is no fractional component following '.'
+    if (str > str_end) return false;
+
+    // Parse the fractional component.
+    // Like the non-lazy path, this will parse up to 9 fractional digits
+    tok_end = ParseDigitToken(str, str_end);
+    int num_digits = std::min<int>(9, tok_end - str);
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(FRACTION, str - str_begin, num_digits, str));
+    str = tok_end;
+
+    // Invalid format if there is more to parse after the fractional component
+    if (str < str_end) return false;
+  }
+  return true;
+}
+
 bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
     boost::posix_time::time_duration* t) {
+  int lazy_len;
+
   DCHECK(TimestampParser::initialized_);
   DCHECK(d != NULL);
   DCHECK(t != NULL);
@@ -248,6 +392,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
     }
   }
 
+  lazy_len = len;
   // Only process what we have to.
   if (len > DEFAULT_DATE_TIME_FMT_LEN) len = DEFAULT_DATE_TIME_FMT_LEN;
   // Determine the default formatting context that's required for parsing.
@@ -278,7 +423,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
           // There is likely a fractional component that's below the expected 9 chars.
           // We will need to work out which default context to use that corresponds to
           // the fractional length in the string.
-          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)) {
+          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] == '.')) {
             switch (str[10]) {
               case ' ': {
                 dt_ctx =
@@ -295,7 +440,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
           break;
         }
       }
-    } else if (str[2] == ':') {
+    } else if (str[2] == ':' && str[5] == ':' && isdigit(str[7])) {
       if (len > DEFAULT_TIME_FRAC_FMT_LEN) len = DEFAULT_TIME_FRAC_FMT_LEN;
       if (len > DEFAULT_TIME_FMT_LEN && str[8] == '.') {
         dt_ctx = &DEFAULT_TIME_FRAC_CTX[len - DEFAULT_TIME_FMT_LEN - 1];
@@ -304,12 +449,23 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
       }
     }
   }
-  if (LIKELY(dt_ctx != NULL)) {
+
+  // Generating context lazily as a fall back if default formats fail.
+  // ParseFormatTokenByStr() does not require a template format string.
+  if (dt_ctx != nullptr) {
     return Parse(str, len, *dt_ctx, d, t);
   } else {
-    *d = boost::gregorian::date();
-    *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
-    return false;
+    DateTimeFormatContext lazy_ctx;
+    lazy_ctx.Reset(str, lazy_len);
+    if (ParseFormatTokensByStr(&lazy_ctx)) {
+      dt_ctx = &lazy_ctx;
+      len = lazy_len;
+      return Parse(str, len, *dt_ctx, d, t);
+    } else {
+      *d = boost::gregorian::date();
+      *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
+      return false;
+    }
   }
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/runtime/timestamp-parse-util.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index bbcc03f..bccf0b7 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -177,6 +177,35 @@ class TimestampParser {
   /// Return true if the parse was successful.
   static bool ParseFormatTokens(DateTimeFormatContext* dt_ctx);
 
+  // Parse out the next digit token from the date/time string by checking for contiguous
+  // digit characters and return a pointer to the end of that token.
+  // str -- pointer to the string to be parsed
+  // str_end -- the pointer to the end of the string to be parsed
+  // Returns the pointer within the string to the end of the valid digit token.
+  static const char* ParseDigitToken(const char* str, const char* str_end);
+
+  // Parse out the next separator token from the date/time string against an expected
+  // character.
+  // str -- pointer to the string to be parsed
+  // str_end -- the pointer to the end of the string to be parsed
+  // sep -- the separator char to compare the token to
+  // Returns the pointer within the string to the end of the valid separator token.
+  static const char* ParseSeparatorToken(
+      const char* str, const char* str_end, const char sep);
+
+  /// Parse the date/time string to generate the DateTimeFormatToken required by
+  /// DateTimeFormatContext. Similar to ParseFormatTokens() this function will take the
+  /// string and length, then heuristically determine whether the value contains date
+  //  tokens, time tokens, or both. Unlike ParseFormatTokens, it does not require the
+  //  template format string.
+  /// str -- valid pointer to the string to parse
+  /// len -- length of the string to parse (must be > 0)
+  /// dt_ctx -- date/time format context (must contain valid tokens)
+  /// d -- the date value where the results of the parsing will be placed
+  /// t -- the time value where the results of the parsing will be placed
+  /// Returns true if the date/time was successfully parsed.
+  static bool ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx);
+
   /// Parse a default date/time string. The default timestamp format is:
   /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Either just the
   /// date or just the time may be specified. All components are required in either the

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/data/lazy_timestamp.csv
----------------------------------------------------------------------
diff --git a/testdata/data/lazy_timestamp.csv b/testdata/data/lazy_timestamp.csv
new file mode 100644
index 0000000..d44db1d
--- /dev/null
+++ b/testdata/data/lazy_timestamp.csv
@@ -0,0 +1,13 @@
+2001-1-2
+2001-1-02
+2001-01-2
+1:6:8
+01:6:8
+1:06:8
+1:6:08
+1:6:8.123456789101112
+1:6:8.123456789
+1:6:8.12345
+2001-1-2 1:6:8
+2001-1-2 1:6:8.123456
+2001-1-2 1:6:8.123456789101112

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index b3d0ca8..b6909c1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2959,3 +2959,39 @@ from functional.alltypes where id = 7
 ---- TYPES
 BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT
 ====
+---- QUERY
+# IMPALA-5315: Test support for non zero-padded date/time strings cast as timestamp
+select cast('2001-1-21 12:5:30' as timestamp)
+---- RESULTS
+2001-01-21 12:05:30
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('2001-1-2 1:5:3.123' as timestamp)
+---- RESULTS
+2001-01-02 01:05:03.123000000
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('1:5:3' as timestamp)
+---- RESULTS
+01:05:03
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('1:5:3.1234567' as timestamp)
+---- RESULTS
+01:05:03.123456700
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('2001-1-2' as timestamp)
+---- RESULTS
+2001-01-02 00:00:00
+---- TYPES
+timestamp
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test b/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test
new file mode 100644
index 0000000..8258072
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test
@@ -0,0 +1,20 @@
+====
+---- QUERY
+select ts from lazy_ts
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+2001-01-02 00:00:00
+2001-01-02 00:00:00
+2001-01-02 00:00:00
+01:06:08
+01:06:08
+01:06:08
+01:06:08
+01:06:08.123456789
+01:06:08.123456789
+01:06:08.123450000
+2001-01-02 01:06:08
+2001-01-02 01:06:08.123456000
+2001-01-02 01:06:08.123456789
+---- TYPES
+timestamp
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index a67b793..a9ba5b8 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -855,3 +855,25 @@ class TestScanTruncatedFiles(ImpalaTestSuite):
     result = self.execute_query("select count(*) from %s" % fq_tbl_name)
     assert(len(result.data) == 1)
     assert(result.data[0] == str(num_rows))
+
+class TestUncompressedText(ImpalaTestSuite):
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestUncompressedText, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format == 'text' and
+        v.get_value('table_format').compression_codec == 'none')
+
+  # IMPALA-5315: Test support for date/time in unpadded format
+  def test_scan_lazy_timestamp(self, vector, unique_database):
+    self.client.execute(("""CREATE TABLE {0}.lazy_ts (ts TIMESTAMP)""").format
+          (unique_database))
+    tbl_loc = get_fs_path("/test-warehouse/%s.db/%s" % (unique_database,
+          "lazy_ts"))
+    check_call(['hdfs', 'dfs', '-copyFromLocal', os.environ['IMPALA_HOME'] +
+          "/testdata/data/lazy_timestamp.csv", tbl_loc])
+    self.run_test_case('QueryTest/select-lazy-timestamp', vector, unique_database)