You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/03/13 22:20:22 UTC

[1/3] impala git commit: IMPALA-6638: Reduce file handle cache lock contention

Repository: impala
Updated Branches:
  refs/heads/master 8283081bf -> 0d7787fe4


IMPALA-6638: Reduce file handle cache lock contention

FileHandleCache::OpenFileHandle() currently holds the
lock while opening a file handle. This lengthens the
duration holding the lock considerably, causing
contention when a lot of file handles are being
opened (i.e. when the cache is cold).

This changes FileHandleCache::OpenFileHandle() drops
the lock while opening the file handle, then
reacquires it to add the file handle to the cache.

When running a simple select on a table with 46801
Parquet files, this fixes a small performance overhead
for the cold cache case compared to having the
cache off. All results are averaged over 5 runs:

File handle cache off: 7.19s
File handle cache cold (no patch): 7.92s
File handle cache cold (with patch): 7.20s

When running a select that accesses and aggregates
4 columns from the same Parquet table (thus requiring
more scan ranges for the same file), the fix reduces
contention, particularly for the case with 8 IO threads
per disk:

1 IO thread per disk:
Without patch: 9.59s
With patch: 8.11s

8 IO threads per disk:
Without patch: 14.2s
With patch: 8.17s

The patch has no impact on the performance when the
cache is hot.

Change-Id: I4c695b21ca556e9c73c703c0c891e64939271c8d
Reviewed-on: http://gerrit.cloudera.org:8080/9576
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3839826e
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3839826e
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3839826e

Branch: refs/heads/master
Commit: 3839826e69e9dff2062464f7eb1bdfed66ca7702
Parents: 8283081
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Sat Mar 10 15:09:58 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 13 02:40:07 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/io/handle-cache.inline.h | 61 +++++++++++++++++-----------
 1 file changed, 37 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3839826e/be/src/runtime/io/handle-cache.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/io/handle-cache.inline.h b/be/src/runtime/io/handle-cache.inline.h
index f0cced0..99b106c 100644
--- a/be/src/runtime/io/handle-cache.inline.h
+++ b/be/src/runtime/io/handle-cache.inline.h
@@ -86,16 +86,22 @@ CachedHdfsFileHandle* FileHandleCache::GetFileHandle(
   // Hash the key and get appropriate partition
   int index = HashUtil::Hash(fname->data(), fname->size(), 0) % cache_partitions_.size();
   FileHandleCachePartition& p = cache_partitions_[index];
-  boost::lock_guard<SpinLock> g(p.lock);
-  pair<typename MapType::iterator, typename MapType::iterator> range =
-    p.cache.equal_range(*fname);
 
   // If this requires a new handle, skip to the creation codepath. Otherwise,
   // find an unused entry with the same mtime
-  FileHandleEntry* ret_elem = nullptr;
   if (!require_new_handle) {
+    boost::lock_guard<SpinLock> g(p.lock);
+    pair<typename MapType::iterator, typename MapType::iterator> range =
+      p.cache.equal_range(*fname);
+
+    // When picking a cached entry, always follow the ordering of the map and
+    // pick earlier entries first. This allows excessive entries for a file
+    // to age out. For example, if there are three entries for a file and only
+    // one is used at a time, only the first will be used and the other two
+    // can age out.
     while (range.first != range.second) {
       FileHandleEntry* elem = &range.first->second;
+      DCHECK(elem->fh.get() != nullptr);
       if (!elem->in_use && elem->fh->mtime() == mtime) {
         // This element is currently in the lru_list, which means that lru_entry must
         // be an iterator pointing into the lru_list.
@@ -104,35 +110,42 @@ CachedHdfsFileHandle* FileHandleCache::GetFileHandle(
         // the lru_list by resetting its iterator to point to the end of the list.
         p.lru_list.erase(elem->lru_entry);
         elem->lru_entry = p.lru_list.end();
-        ret_elem = elem;
         *cache_hit = true;
-        break;
+        elem->in_use = true;
+        return elem->fh.get();
       }
       ++range.first;
     }
   }
 
   // There was no entry that was free or caller asked for a new handle
-  if (!ret_elem) {
-    *cache_hit = false;
-    // Create a new entry and move it into the map
-    CachedHdfsFileHandle* new_fh = new CachedHdfsFileHandle(fs, fname->data(), mtime);
-    if (!new_fh->ok()) {
-      delete new_fh;
-      return nullptr;
-    }
-    FileHandleEntry entry(new_fh, p.lru_list);
-    typename MapType::iterator new_it = p.cache.emplace_hint(range.second,
-        *fname, std::move(entry));
-    ret_elem = &new_it->second;
-    ++p.size;
-    if (p.size > p.capacity) EvictHandles(p);
+  // Opening a file handle requires talking to the NameNode, so construct
+  // the file handle without holding the lock to reduce contention.
+  *cache_hit = false;
+  // Create a new file handle
+  CachedHdfsFileHandle* new_fh = new CachedHdfsFileHandle(fs, fname->data(), mtime);
+  if (!new_fh->ok()) {
+    delete new_fh;
+    return nullptr;
   }
 
-  DCHECK(ret_elem->fh.get() != nullptr);
-  DCHECK(!ret_elem->in_use);
-  ret_elem->in_use = true;
-  return ret_elem->fh.get();
+  // Get the lock and create/move the new entry into the map
+  // This entry is new and will be immediately used. Place it as the first entry
+  // for this file in the multimap. The ordering is largely unimportant if all the
+  // existing entries are in use. However, if require_new_handle is true, there may be
+  // unused entries, so it would make more sense to insert the new entry at the front.
+  boost::lock_guard<SpinLock> g(p.lock);
+  pair<typename MapType::iterator, typename MapType::iterator> range =
+      p.cache.equal_range(*fname);
+  FileHandleEntry entry(new_fh, p.lru_list);
+  typename MapType::iterator new_it = p.cache.emplace_hint(range.first,
+      *fname, std::move(entry));
+  ++p.size;
+  if (p.size > p.capacity) EvictHandles(p);
+  FileHandleEntry* new_elem = &new_it->second;
+  DCHECK(!new_elem->in_use);
+  new_elem->in_use = true;
+  return new_elem->fh.get();
 }
 
 void FileHandleCache::ReleaseFileHandle(std::string* fname,


[2/3] impala git commit: IMPALA-6635: Add DECIMAL type to Kudu predicates

Posted by ta...@apache.org.
IMPALA-6635: Add DECIMAL type to Kudu predicates

This patch enables pushing scan predicates on
DECIMAL columns down to Kudu.

Testing:
- Added Planner decimal predicate test to kudu.test
- Added Planner decimal in-list test to kudu-selectivity.test

Change-Id: I2569a9e1d58f1c58884d58633d46348364888ed7
Reviewed-on: http://gerrit.cloudera.org:8080/9578
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6d8ce640
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6d8ce640
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6d8ce640

Branch: refs/heads/master
Commit: 6d8ce64020ac8a103f03814c2903f6c5835886c0
Parents: 3839826
Author: Grant Henke <gh...@cloudera.com>
Authored: Fri Mar 9 15:12:43 2018 -0600
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 13 20:31:43 2018 +0000

----------------------------------------------------------------------
 .../java/org/apache/impala/planner/KuduScanNode.java |  6 ++++++
 .../queries/PlannerTest/kudu-selectivity.test        | 14 ++++++++++++++
 .../functional-planner/queries/PlannerTest/kudu.test | 15 +++++++++++++++
 3 files changed, 35 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/6d8ce640/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
index a02b49b..f60d519 100644
--- a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
@@ -406,6 +406,11 @@ public class KuduScanNode extends ScanNode {
         }
         break;
       }
+      case DECIMAL: {
+        kuduPredicate = KuduPredicate.newComparisonPredicate(column, op,
+            ((NumericLiteral)literal).getValue());
+        break;
+      }
       default: break;
     }
     if (kuduPredicate == null) return false;
@@ -505,6 +510,7 @@ public class KuduScanNode extends ScanNode {
         }
         break;
       }
+      case DECIMAL: return ((NumericLiteral) e).getValue();
       default:
         Preconditions.checkState(false,
             "Unsupported Kudu type considered for predicate: %s", e.getType().toSql());

http://git-wip-us.apache.org/repos/asf/impala/blob/6d8ce640/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index e6c8422..cf3014f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -121,6 +121,7 @@ Per-Host Resources: mem-estimate=0B mem-reservation=0B
      mem-estimate=0B mem-reservation=0B
      tuple-ids=0 row-size=124B cardinality=3
 ====
+# Test alltypes in-list predicates.
 select * from functional_kudu.alltypes where
 -- predicates that can be pushed
 tinyint_col in (1, 2) and
@@ -156,6 +157,19 @@ Per-Host Resources: mem-estimate=0B mem-reservation=0B
      mem-estimate=0B mem-reservation=0B
      tuple-ids=0 row-size=97B cardinality=5
 ====
+# Test decimal in-list predicates.
+select * from functional_kudu.decimal_tbl where d1 in (1234, 12345);
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=0B mem-reservation=0B
+  PLAN-ROOT SINK
+  |  mem-estimate=0B mem-reservation=0B
+  |
+  00:SCAN KUDU [functional_kudu.decimal_tbl]
+     kudu predicates: d1 IN (1234, 12345)
+     mem-estimate=0B mem-reservation=0B
+     tuple-ids=0 row-size=56B cardinality=2
+====
 select * from functional_kudu.alltypes where
 tinyint_col is not null and
 smallint_col is null and

http://git-wip-us.apache.org/repos/asf/impala/blob/6d8ce640/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 354a6fa..57a08dc 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -535,3 +535,18 @@ INSERT INTO KUDU [functional_kudu.jointbl]
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
 ====
+# Decimal predicate.
+select * from functional_kudu.decimal_tbl where d4 = 0.123456789;
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN KUDU [functional_kudu.decimal_tbl]
+   kudu predicates: d4 = 0.123456789
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN KUDU [functional_kudu.decimal_tbl]
+   kudu predicates: d4 = 0.123456789
+====
\ No newline at end of file


[3/3] impala git commit: IMPALA-5315: Cast to timestamp fails for YYYY-M-D format

Posted by ta...@apache.org.
IMPALA-5315: Cast to timestamp fails for YYYY-M-D format

This change allows casting of a string in 'lazy' date/time
format to timestamp. The supported lazy date formats are:
  yyyy-[M]M-[d]d
  yyyy-[M]M-[d]d [H]H:[m]m:[s]s[.SSSSSSSSS]
  [H]H:[m]m:[s]s[.SSSSSSSSS]

We will incur a SCAN performance penalty (approximately 1/2
TotalReadThroughput) when the string is in one of these
lazy date/time format.

Testing:
Benchmarked the performance consequence by executing this SQL on
a private build over 3.8 billion rows:
select min(cast (time_string as timestamp)) from private.impala_5315

Added tests for valid and invalid date/time format strings
in expr-test.cc to be inline with existing tests for CAST() function.

Added end-to-end tests into exprs.test and
select-lazy-timestamp.test to exercise the new function within
the context of a query.

Added tests to exercise the leading and trailing white space trimming
behaviour in default and lazy date/time string format (IMPALA-6630).

Change-Id: Ib9a184a09d7e7783f04d47588537612c2ecec28f
Reviewed-on: http://gerrit.cloudera.org:8080/7009
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0d7787fe
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0d7787fe
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0d7787fe

Branch: refs/heads/master
Commit: 0d7787fe4df1ab2bb8231b0ee0912e3cf2787f9e
Parents: 6d8ce64
Author: Vincent Tran <vt...@cloudera.com>
Authored: Sat May 27 03:02:19 2017 -0400
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 13 22:10:18 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 117 ++++++++++++-
 be/src/runtime/timestamp-parse-util.cc          | 168 ++++++++++++++++++-
 be/src/runtime/timestamp-parse-util.h           |  29 ++++
 testdata/data/lazy_timestamp.csv                |  13 ++
 .../queries/QueryTest/exprs.test                |  36 ++++
 .../QueryTest/select-lazy-timestamp.test        |  20 +++
 tests/query_test/test_scanners.py               |  22 +++
 7 files changed, 398 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index e02ff6a..bd25328 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3051,6 +3051,121 @@ TEST_F(ExprTest, CastExprs) {
   TestStringValue("cast(cast(cast('2012-01-01 09:10:11.123456789' as timestamp) as"
       " timestamp) as string)", "2012-01-01 09:10:11.123456789");
 
+  // Test casting of lazy date and/or time format string to timestamp
+  TestTimestampValue(
+      "cast('2001-1-2' as timestamp)", TimestampValue::Parse("2001-01-02 00:00:00"));
+  TestTimestampValue(
+      "cast('2001-01-3' as timestamp)", TimestampValue::Parse("2001-01-03 00:00:00"));
+  TestTimestampValue(
+      "cast('2001-1-21' as timestamp)", TimestampValue::Parse("2001-01-21 00:00:00"));
+  TestTimestampValue("cast('2001-1-21 12:5:30' as timestamp)",
+      TimestampValue::Parse("2001-01-21 12:05:30"));
+  TestTimestampValue("cast('2001-1-21 13:5:05' as timestamp)",
+      TimestampValue::Parse("2001-01-21 13:05:05"));
+  TestTimestampValue("cast('2001-1-21 1:2:3' as timestamp)",
+      TimestampValue::Parse("2001-01-21 01:02:03"));
+  TestTimestampValue("cast('2001-1-21 1:5:31.12345' as timestamp)",
+      TimestampValue::Parse("2001-01-21 01:05:31.123450000"));
+  TestTimestampValue("cast('2001-1-21 1:5:31.12345678910111213' as timestamp)",
+      TimestampValue::Parse("2001-01-21 01:05:31.123456789"));
+  TestTimestampValue(
+      "cast('1:05:1.12' as timestamp)", TimestampValue::Parse("01:05:01.120000000"));
+  TestTimestampValue("cast('1:05:1' as timestamp)", TimestampValue::Parse("01:05:01"));
+  TestTimestampValue("cast('        2001-01-9 1:05:1        ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestIsNull("cast('2001-1-21     11:2:3' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1:05:31.123456foo' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('10/feb/10' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-foo1-2bar' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909/1-/2' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-1-2 12:32:1.111bar' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:1.111.111.2' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test various ways of truncating a "lazy" format to produce an invalid timestamp.
+  TestIsNull("cast('1909-10-2 12:32:1.' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:11.' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:11. ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32: ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:32:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:2:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:2' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 1:2 ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12 ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 2' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10- ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test missing number from format.
+  TestIsNull("cast('1909-10-2 12:32:.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12::1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 :32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10- 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909--2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('-10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test duplicate separators - should return NULL because not a valid format.
+  TestIsNull("cast('1909--10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10--2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12::32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32::1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:1..9999' as timestamp)", TYPE_TIMESTAMP);
+
+  // Test numbers with too many digits in date/time - should return NULL because not a
+  // valid timestamp.
+  TestIsNull("cast('19097-10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-107-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-277 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 127:32:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:327:1.9999' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('1909-10-2 12:32:177.9999' as timestamp)", TYPE_TIMESTAMP);
+
+  // IMPALA-6630: Test whitespace trimming mechanism when cast from string to timestamp
+  TestTimestampValue("cast(' \t\r\n 2001-01-09 01:05:01.123456789 \t\r\n' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue("cast(' \t\r\n 2001-01-09T01:05:01.123456789 \t\r\n' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue("cast('  \t\r\n      2001-01-09 01:05:01   \t\r\n  ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n      2001-01-09T01:05:01   \t\r\n  ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n      2001-01-09   \t\r\n     ' as timestamp)",
+      TimestampValue::Parse("2001-01-09"));
+  TestTimestampValue("cast('  \t\r\n      01:05:01   \t\r\n     ' as timestamp)",
+      TimestampValue::Parse("01:05:01"));
+  TestTimestampValue("cast(' \t\r\n 01:05:01.123456789   \t\r\n     ' as timestamp)",
+      TimestampValue::Parse("01:05:01.123456789"));
+  TestTimestampValue("cast('  \t\r\n      2001-1-9 1:5:1    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n  2001-1-9 1:5:1.12345678  \t\r\n ' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456780"));
+  TestTimestampValue("cast('  \t\r\n      1:5:1    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("01:05:01"));
+  TestTimestampValue("cast('  \t\r\n      1:5:1.12345678    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("01:05:01.123456780"));
+  TestTimestampValue("cast('  \t\r\n      2001-1-9    \t\r\n    ' as timestamp)",
+      TimestampValue::Parse("2001-01-09"));
+
+  // Test invalid whitespace locations in strings to be casted to timestamp
+  TestIsNull(
+      "cast(' \t\r\n  2001-01-09      01:05:01  \t\r\n ' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09   01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09\t01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09\r01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09\n01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9    1:5:1' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9\t1:5:1' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9\r1:5:1' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-1-9\n1:5:1' as timestamp)", TYPE_TIMESTAMP);
+
   // IMPALA-3163: Test precise conversion from Decimal to Timestamp.
   TestTimestampValue("cast(cast(1457473016.1230 as decimal(17,4)) as timestamp)",
       TimestampValue::Parse("2016-03-08 21:36:56.123000000", 29));
@@ -5984,7 +6099,7 @@ TEST_F(ExprTest, TimestampFunctions) {
   TestIsNull("timestamp_cmp('','1966-05-04 15:33:45')", TYPE_INT);
   TestIsNull("timestamp_cmp(NULL,'1966-05-04 15:33:45')", TYPE_INT);
   // Invalid timestamp test case
-  TestIsNull("timestamp_cmp('1966-5-4 5:33:45','1966-5-4 15:33:45')", TYPE_INT);
+  TestIsNull("timestamp_cmp('1966-5-4 50:33:45','1966-5-4 15:33:45')", TYPE_INT);
 
   TestValue("int_months_between('1967-07-19','1966-06-04')", TYPE_INT, 13);
   TestValue("int_months_between('1966-06-04 16:34:45','1967-07-19 15:33:46')",

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/runtime/timestamp-parse-util.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index e64d904..c444214 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -17,6 +17,8 @@
 
 #include "runtime/timestamp-parse-util.h"
 
+#include <algorithm>
+
 #include <boost/assign/list_of.hpp>
 #include <boost/date_time/gregorian/gregorian.hpp>
 #include <boost/unordered_map.hpp>
@@ -214,8 +216,150 @@ bool TimestampParser::ParseFormatTokens(DateTimeFormatContext* dt_ctx) {
   return dt_ctx->has_date_toks || dt_ctx->has_time_toks;
 }
 
+const char* TimestampParser::ParseDigitToken(const char* str, const char* str_end) {
+  const char* tok_end = str;
+  while (tok_end < str_end) {
+    if (!isdigit(*tok_end)) return tok_end;
+    ++tok_end;
+  }
+  return tok_end;
+}
+
+const char* TimestampParser::ParseSeparatorToken(
+    const char* str, const char* str_end, const char sep) {
+  const char* tok_end = str;
+  while (tok_end < str_end) {
+    if (*tok_end != sep) return tok_end;
+    ++tok_end;
+  }
+  return tok_end;
+}
+
+bool TimestampParser::ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx) {
+  DCHECK(dt_ctx != NULL);
+  DCHECK(dt_ctx->fmt != NULL);
+  DCHECK_GT(dt_ctx->fmt_len, 0);
+  DCHECK_EQ(dt_ctx->toks.size(), 0);
+  const char* str_begin = dt_ctx->fmt;
+  const char* str_end = str_begin + dt_ctx->fmt_len;
+  const char* str = str_begin;
+  const char* tok_end;
+
+  // Parse the 4-digit year
+  tok_end = ParseDigitToken(str, str_end);
+  if (tok_end - str == 4) {
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(YEAR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Check for the date separator '-'
+    tok_end = ParseSeparatorToken(str, str_end, '-');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Parse the 1 or 2 digit month.
+    tok_end = ParseDigitToken(str, str_end);
+    if (tok_end - str != 1 && tok_end - str != 2) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(MONTH_IN_YEAR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Check for the date separator '-'
+    tok_end = ParseSeparatorToken(str, str_end, '-');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Parse the 1 or 2 digit day in month
+    tok_end = ParseDigitToken(str, str_end);
+    if (tok_end - str != 1 && tok_end - str != 2) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(DAY_IN_MONTH, str - str_begin, tok_end - str, str));
+    str = tok_end;
+    dt_ctx->has_date_toks = true;
+
+    // If the string ends here, we only have a date component
+    if (str == str_end) return true;
+
+    // Check for the space between date and time component
+    tok_end = ParseSeparatorToken(str, str_end, ' ');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Invalid format if date-time separator is not followed by more digits
+    if (str > str_end) return false;
+    tok_end = ParseDigitToken(str, str_end);
+  }
+
+  // Parse the 1 or 2 digit hour
+  if (tok_end - str != 1 && tok_end - str != 2) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(HOUR_IN_DAY, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Check for the time component separator ':'
+  tok_end = ParseSeparatorToken(str, str_end, ':');
+  if (tok_end - str != 1) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Parse the 1 or 2 digit minute
+  tok_end = ParseDigitToken(str, str_end);
+  if (tok_end - str != 1 && tok_end - str != 2) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(MINUTE_IN_HOUR, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Check for the time component separator ':'
+  tok_end = ParseSeparatorToken(str, str_end, ':');
+  if (tok_end - str != 1) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+  str = tok_end;
+
+  // Parse the 1 or 2 digit second
+  tok_end = ParseDigitToken(str, str_end);
+  if (tok_end - str != 1 && tok_end - str != 2) return false;
+  dt_ctx->toks.push_back(
+      DateTimeFormatToken(SECOND_IN_MINUTE, str - str_begin, tok_end - str, str));
+  str = tok_end;
+  dt_ctx->has_time_toks = true;
+
+  // There is more to parse, there maybe a fractional component.
+  if (str < str_end) {
+    tok_end = ParseSeparatorToken(str, str_end, '.');
+    if (tok_end - str != 1) return false;
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
+    str = tok_end;
+
+    // Invalid format when there is no fractional component following '.'
+    if (str > str_end) return false;
+
+    // Parse the fractional component.
+    // Like the non-lazy path, this will parse up to 9 fractional digits
+    tok_end = ParseDigitToken(str, str_end);
+    int num_digits = std::min<int>(9, tok_end - str);
+    dt_ctx->toks.push_back(
+        DateTimeFormatToken(FRACTION, str - str_begin, num_digits, str));
+    str = tok_end;
+
+    // Invalid format if there is more to parse after the fractional component
+    if (str < str_end) return false;
+  }
+  return true;
+}
+
 bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
     boost::posix_time::time_duration* t) {
+  int lazy_len;
+
   DCHECK(TimestampParser::initialized_);
   DCHECK(d != NULL);
   DCHECK(t != NULL);
@@ -248,6 +392,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
     }
   }
 
+  lazy_len = len;
   // Only process what we have to.
   if (len > DEFAULT_DATE_TIME_FMT_LEN) len = DEFAULT_DATE_TIME_FMT_LEN;
   // Determine the default formatting context that's required for parsing.
@@ -278,7 +423,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
           // There is likely a fractional component that's below the expected 9 chars.
           // We will need to work out which default context to use that corresponds to
           // the fractional length in the string.
-          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)) {
+          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] == '.')) {
             switch (str[10]) {
               case ' ': {
                 dt_ctx =
@@ -295,7 +440,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
           break;
         }
       }
-    } else if (str[2] == ':') {
+    } else if (str[2] == ':' && str[5] == ':' && isdigit(str[7])) {
       if (len > DEFAULT_TIME_FRAC_FMT_LEN) len = DEFAULT_TIME_FRAC_FMT_LEN;
       if (len > DEFAULT_TIME_FMT_LEN && str[8] == '.') {
         dt_ctx = &DEFAULT_TIME_FRAC_CTX[len - DEFAULT_TIME_FMT_LEN - 1];
@@ -304,12 +449,23 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
       }
     }
   }
-  if (LIKELY(dt_ctx != NULL)) {
+
+  // Generating context lazily as a fall back if default formats fail.
+  // ParseFormatTokenByStr() does not require a template format string.
+  if (dt_ctx != nullptr) {
     return Parse(str, len, *dt_ctx, d, t);
   } else {
-    *d = boost::gregorian::date();
-    *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
-    return false;
+    DateTimeFormatContext lazy_ctx;
+    lazy_ctx.Reset(str, lazy_len);
+    if (ParseFormatTokensByStr(&lazy_ctx)) {
+      dt_ctx = &lazy_ctx;
+      len = lazy_len;
+      return Parse(str, len, *dt_ctx, d, t);
+    } else {
+      *d = boost::gregorian::date();
+      *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
+      return false;
+    }
   }
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/runtime/timestamp-parse-util.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index bbcc03f..bccf0b7 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -177,6 +177,35 @@ class TimestampParser {
   /// Return true if the parse was successful.
   static bool ParseFormatTokens(DateTimeFormatContext* dt_ctx);
 
+  // Parse out the next digit token from the date/time string by checking for contiguous
+  // digit characters and return a pointer to the end of that token.
+  // str -- pointer to the string to be parsed
+  // str_end -- the pointer to the end of the string to be parsed
+  // Returns the pointer within the string to the end of the valid digit token.
+  static const char* ParseDigitToken(const char* str, const char* str_end);
+
+  // Parse out the next separator token from the date/time string against an expected
+  // character.
+  // str -- pointer to the string to be parsed
+  // str_end -- the pointer to the end of the string to be parsed
+  // sep -- the separator char to compare the token to
+  // Returns the pointer within the string to the end of the valid separator token.
+  static const char* ParseSeparatorToken(
+      const char* str, const char* str_end, const char sep);
+
+  /// Parse the date/time string to generate the DateTimeFormatToken required by
+  /// DateTimeFormatContext. Similar to ParseFormatTokens() this function will take the
+  /// string and length, then heuristically determine whether the value contains date
+  //  tokens, time tokens, or both. Unlike ParseFormatTokens, it does not require the
+  //  template format string.
+  /// str -- valid pointer to the string to parse
+  /// len -- length of the string to parse (must be > 0)
+  /// dt_ctx -- date/time format context (must contain valid tokens)
+  /// d -- the date value where the results of the parsing will be placed
+  /// t -- the time value where the results of the parsing will be placed
+  /// Returns true if the date/time was successfully parsed.
+  static bool ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx);
+
   /// Parse a default date/time string. The default timestamp format is:
   /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Either just the
   /// date or just the time may be specified. All components are required in either the

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/data/lazy_timestamp.csv
----------------------------------------------------------------------
diff --git a/testdata/data/lazy_timestamp.csv b/testdata/data/lazy_timestamp.csv
new file mode 100644
index 0000000..d44db1d
--- /dev/null
+++ b/testdata/data/lazy_timestamp.csv
@@ -0,0 +1,13 @@
+2001-1-2
+2001-1-02
+2001-01-2
+1:6:8
+01:6:8
+1:06:8
+1:6:08
+1:6:8.123456789101112
+1:6:8.123456789
+1:6:8.12345
+2001-1-2 1:6:8
+2001-1-2 1:6:8.123456
+2001-1-2 1:6:8.123456789101112

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index b3d0ca8..b6909c1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2959,3 +2959,39 @@ from functional.alltypes where id = 7
 ---- TYPES
 BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT
 ====
+---- QUERY
+# IMPALA-5315: Test support for non zero-padded date/time strings cast as timestamp
+select cast('2001-1-21 12:5:30' as timestamp)
+---- RESULTS
+2001-01-21 12:05:30
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('2001-1-2 1:5:3.123' as timestamp)
+---- RESULTS
+2001-01-02 01:05:03.123000000
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('1:5:3' as timestamp)
+---- RESULTS
+01:05:03
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('1:5:3.1234567' as timestamp)
+---- RESULTS
+01:05:03.123456700
+---- TYPES
+timestamp
+====
+---- QUERY
+select cast('2001-1-2' as timestamp)
+---- RESULTS
+2001-01-02 00:00:00
+---- TYPES
+timestamp
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test b/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test
new file mode 100644
index 0000000..8258072
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test
@@ -0,0 +1,20 @@
+====
+---- QUERY
+select ts from lazy_ts
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+2001-01-02 00:00:00
+2001-01-02 00:00:00
+2001-01-02 00:00:00
+01:06:08
+01:06:08
+01:06:08
+01:06:08
+01:06:08.123456789
+01:06:08.123456789
+01:06:08.123450000
+2001-01-02 01:06:08
+2001-01-02 01:06:08.123456000
+2001-01-02 01:06:08.123456789
+---- TYPES
+timestamp
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index a67b793..a9ba5b8 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -855,3 +855,25 @@ class TestScanTruncatedFiles(ImpalaTestSuite):
     result = self.execute_query("select count(*) from %s" % fq_tbl_name)
     assert(len(result.data) == 1)
     assert(result.data[0] == str(num_rows))
+
+class TestUncompressedText(ImpalaTestSuite):
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestUncompressedText, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format == 'text' and
+        v.get_value('table_format').compression_codec == 'none')
+
+  # IMPALA-5315: Test support for date/time in unpadded format
+  def test_scan_lazy_timestamp(self, vector, unique_database):
+    self.client.execute(("""CREATE TABLE {0}.lazy_ts (ts TIMESTAMP)""").format
+          (unique_database))
+    tbl_loc = get_fs_path("/test-warehouse/%s.db/%s" % (unique_database,
+          "lazy_ts"))
+    check_call(['hdfs', 'dfs', '-copyFromLocal', os.environ['IMPALA_HOME'] +
+          "/testdata/data/lazy_timestamp.csv", tbl_loc])
+    self.run_test_case('QueryTest/select-lazy-timestamp', vector, unique_database)