You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jb...@apache.org on 2017/09/07 03:50:23 UTC

[5/8] incubator-impala git commit: IMPALA-5317: add DATE_TRUNC() function

IMPALA-5317: add DATE_TRUNC() function

Added a UDF builtin function date_trunc.
Reuse many of the Trunc functions implemented already for trunc() including
truncate unit and except strToTruncUnit
Added checks to ensure that truncation results that fall outside of
posix timestamp range are returned as NULL.
Added ctest for the date_trunc function.

Change-Id: I953ba006cbb166dcc78e8c0c12dfbf70f093b584
Reviewed-on: http://gerrit.cloudera.org:8080/7313
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f538b439
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f538b439
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f538b439

Branch: refs/heads/master
Commit: f538b43911eb4bdc4ce5269038f0011e3e83a3de
Parents: b1edaf2
Author: Sandeep Akinapelli <sa...@cloudera.com>
Authored: Fri Jun 16 11:56:10 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 01:29:01 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                    | 115 ++++++++-
 be/src/exprs/udf-builtins-ir.cc              |   5 +
 be/src/exprs/udf-builtins.cc                 | 272 ++++++++++++++++++----
 be/src/exprs/udf-builtins.h                  |  32 ++-
 common/function-registry/impala_functions.py |   4 +
 5 files changed, 382 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f538b439/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index e04abc5..3899a7a 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -7300,9 +7300,122 @@ TEST_F(ExprTest, UuidTest) {
   EXPECT_TRUE(string_set.size() == NUM_UUIDS);
 }
 
+TEST_F(ExprTest, DateTruncTest) {
+  TestTimestampValue("date_trunc('MILLENNIUM', '2016-05-08 10:30:00')",
+      TimestampValue::Parse("2000-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MILLENNIUM', '2000-01-01 00:00:00')",
+      TimestampValue::Parse("2000-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('CENTURY', '2016-05-08 10:30:00')",
+      TimestampValue::Parse("2000-01-01 00:00:00  "));
+  TestTimestampValue("date_trunc('CENTURY', '2116-05-08 10:30:00')",
+      TimestampValue::Parse("2100-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('DECADE', '2116-05-08 10:30:00')",
+      TimestampValue::Parse("2110-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('YEAR', '2016-05-08 10:30:00')",
+      TimestampValue::Parse("2016-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MONTH', '2016-05-08 00:00:00')",
+      TimestampValue::Parse("2016-05-01 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '2116-05-08 10:30:00')",
+      TimestampValue::Parse("2116-05-04 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '2017-01-01 10:37:03.455722111')",
+      TimestampValue::Parse("2016-12-26 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '2017-01-02 10:37:03.455722111')",
+      TimestampValue::Parse("2017-01-02 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '2017-01-07 10:37:03.455722111')",
+      TimestampValue::Parse("2017-01-02 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '2017-01-08 10:37:03.455722111')",
+      TimestampValue::Parse("2017-01-02 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '2017-01-09 10:37:03.455722111')",
+      TimestampValue::Parse("2017-01-09 00:00:00"));
+  TestTimestampValue("date_trunc('DAY', '1416-05-08 10:37:03.455722111')",
+      TimestampValue::Parse("1416-05-08 00:00:00"));
+
+  TestTimestampValue("date_trunc('HOUR', '1416-05-08 10:30:03.455722111')",
+      TimestampValue::Parse("1416-05-08 10:00:00"));
+  TestTimestampValue("date_trunc('HOUR', '1416-05-08 23:30:03.455722111')",
+      TimestampValue::Parse("1416-05-08 23:00:00"));
+  TestTimestampValue("date_trunc('MINUTE', '1416-05-08 10:37:03.455722111')",
+      TimestampValue::Parse("1416-05-08 10:37:00"));
+  TestTimestampValue("date_trunc('SECOND', '1416-05-08 10:37:03.455722111')",
+      TimestampValue::Parse("1416-05-08 10:37:03"));
+  TestTimestampValue("date_trunc('MILLISECONDS', '1416-05-08 10:37:03.455722111')",
+      TimestampValue::Parse("1416-05-08 10:37:03.455000000"));
+  TestTimestampValue("date_trunc('MICROSECONDS', '1416-05-08 10:37:03.455722111')",
+      TimestampValue::Parse("1416-05-08 10:37:03.455722000"));
+
+  // Test corner cases.
+  TestTimestampValue("date_trunc('MILLENNIUM', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9000-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('CENTURY', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9900-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('DECADE', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9990-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('YEAR', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MONTH', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-01 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-27 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '1400-01-06 23:59:59.999999999')",
+      TimestampValue::Parse("1400-01-06 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '1400-01-07 23:59:59.999999999')",
+      TimestampValue::Parse("1400-01-06 00:00:00"));
+  TestTimestampValue("date_trunc('DAY', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-31 00:00:00"));
+  TestTimestampValue("date_trunc('HOUR', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-31 23:00:00"));
+  TestTimestampValue("date_trunc('MINUTE', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-31 23:59:00"));
+  TestTimestampValue("date_trunc('SECOND', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-31 23:59:59"));
+  TestTimestampValue("date_trunc('MILLISECONDS', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-31 23:59:59.999"));
+  TestTimestampValue("date_trunc('MICROSECONDS', '9999-12-31 23:59:59.999999999')",
+      TimestampValue::Parse("9999-12-31 23:59:59.999999"));
+
+  TestTimestampValue("date_trunc('CENTURY', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('DECADE', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('YEAR', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MONTH', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('DAY', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('HOUR', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MINUTE', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('SECOND', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MILLISECONDS', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MICROSECONDS', '1400-01-01 00:00:00')",
+      TimestampValue::Parse("1400-01-01 00:00:00"));
+
+  // valid input with invalid output
+  TestIsNull("date_trunc('MILLENNIUM', '1416-05-08 10:30:00')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('MILLENNIUM', '1999-12-31 11:59:59.999999')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('WEEK', '1400-01-01 00:00:00')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('WEEK', '1400-01-05 00:00:00')", TYPE_TIMESTAMP);
+
+  // Test invalid input.
+  TestIsNull("date_trunc('HOUR', '12202010')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('HOUR', '')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('HOUR', NULL)", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('HOUR', '02-13-2014')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('CENTURY', '16-05-08 10:30:00')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('CENTURY', '1116-05-08 10:30:00')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('DAY', '00:00:00')", TYPE_TIMESTAMP);
+  TestError("date_trunc('YsEAR', '2016-05-08 10:30:00')");
+  TestError("date_trunc('D', '2116-05-08 10:30:00')");
+  TestError("date_trunc('2017-01-09', '2017-01-09 10:37:03.455722111' )");
+  TestError("date_trunc('2017-01-09 10:00:00', 'HOUR')");
+}
 } // namespace impala
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
   InitCommonRuntime(argc, argv, true, TestInfo::BE_TEST);
   ABORT_IF_ERROR(TimezoneDatabase::Initialize());

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f538b439/be/src/exprs/udf-builtins-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins-ir.cc b/be/src/exprs/udf-builtins-ir.cc
index b02a422..03b88ba 100644
--- a/be/src/exprs/udf-builtins-ir.cc
+++ b/be/src/exprs/udf-builtins-ir.cc
@@ -108,6 +108,11 @@ TimestampVal UdfBuiltins::Trunc(FunctionContext* context, const TimestampVal& tv
   return TruncImpl(context, tv, unit_str);
 }
 
+TimestampVal UdfBuiltins::DateTrunc(
+    FunctionContext* context, const StringVal& unit_str, const TimestampVal& tv) {
+  return DateTruncImpl(context, tv, unit_str);
+}
+
 // Maps the user facing name of a unit to a TExtractField
 // Returns the TExtractField for the given unit
 TExtractField::type StrToExtractField(FunctionContext* ctx, const StringVal& unit_str) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f538b439/be/src/exprs/udf-builtins.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins.cc b/be/src/exprs/udf-builtins.cc
index 4662da0..01178e3 100644
--- a/be/src/exprs/udf-builtins.cc
+++ b/be/src/exprs/udf-builtins.cc
@@ -32,23 +32,31 @@ using boost::gregorian::date;
 using boost::gregorian::date_duration;
 using boost::posix_time::ptime;
 using boost::posix_time::time_duration;
+using boost::posix_time::milliseconds;
+using boost::posix_time::microseconds;
 using namespace impala;
 using namespace strings;
 
 // The units which can be used when Truncating a Timestamp
-struct TruncUnit {
-  enum Type {
-    UNIT_INVALID,
-    YEAR,
-    QUARTER,
-    MONTH,
-    WW,
-    W,
-    DAY,
-    DAY_OF_WEEK,
-    HOUR,
-    MINUTE
-  };
+enum class TruncUnit {
+  UNIT_INVALID,
+  YEAR,
+  QUARTER,
+  MONTH,
+  WW,
+  W,
+  DAY,
+  DAY_OF_WEEK,
+  HOUR,
+  MINUTE,
+  // Below units are only used by DateTrunc
+  MILLENNIUM,
+  CENTURY,
+  DECADE,
+  WEEK,
+  SECOND,
+  MILLISECONDS,
+  MICROSECONDS,
 };
 
 // Put non-exported functions in anonymous namespace to encourage inlining.
@@ -68,13 +76,48 @@ date GoBackToWeekday(const date& orig_date, int week_day) {
   return orig_date - date_duration(7 + diff);
 }
 
+// Maps the user facing name of a unit to a TruncUnit used by DateTrunc function
+// Returns the TruncUnit for the given string
+TruncUnit StrToDateTruncUnit(FunctionContext* ctx, const StringVal& unit_str) {
+  StringVal unit = UdfBuiltins::Lower(ctx, unit_str);
+  if (UNLIKELY(unit.is_null)) return TruncUnit::UNIT_INVALID;
+
+  if (unit == "millennium") {
+    return TruncUnit::MILLENNIUM;
+  } else if (unit == "century") {
+    return TruncUnit::CENTURY;
+  } else if (unit == "decade") {
+    return TruncUnit::DECADE;
+  } else if (unit == "year") {
+    return TruncUnit::YEAR;
+  } else if (unit == "month") {
+    return TruncUnit::MONTH;
+  } else if (unit == "week") {
+    return TruncUnit::WEEK;
+  } else if (unit == "day") {
+    return TruncUnit::DAY;
+  } else if (unit == "hour") {
+    return TruncUnit::HOUR;
+  } else if (unit == "minute") {
+    return TruncUnit::MINUTE;
+  } else if (unit == "second") {
+    return TruncUnit::SECOND;
+  } else if (unit == "milliseconds") {
+    return TruncUnit::MILLISECONDS;
+  } else if (unit == "microseconds") {
+    return TruncUnit::MICROSECONDS;
+  } else {
+    return TruncUnit::UNIT_INVALID;
+  }
+}
+
 // Maps the user facing name of a unit to a TruncUnit
 // Returns the TruncUnit for the given string
-TruncUnit::Type StrToTruncUnit(FunctionContext* ctx, const StringVal& unit_str) {
+TruncUnit StrToTruncUnit(FunctionContext* ctx, const StringVal& unit_str) {
   StringVal unit = UdfBuiltins::Lower(ctx, unit_str);
   if (UNLIKELY(unit.is_null)) return TruncUnit::UNIT_INVALID;
-  if ((unit == "syyyy") || (unit == "yyyy") || (unit == "year") || (unit == "syear") ||
-      (unit == "yyy") || (unit == "yy") || (unit == "y")) {
+  if ((unit == "syyyy") || (unit == "yyyy") || (unit == "year") || (unit == "syear")
+      || (unit == "yyy") || (unit == "yy") || (unit == "y")) {
     return TruncUnit::YEAR;
   } else if (unit == "q") {
     return TruncUnit::QUARTER;
@@ -97,6 +140,27 @@ TruncUnit::Type StrToTruncUnit(FunctionContext* ctx, const StringVal& unit_str)
   }
 }
 
+// Truncate to first day of Milllenium
+TimestampValue TruncMillenium(const date& orig_date) {
+  date new_date(orig_date.year() / 1000 * 1000, 1, 1);
+  time_duration new_time(0, 0, 0, 0);
+  return TimestampValue(new_date, new_time);
+}
+
+// Truncate to first day of century
+TimestampValue TruncCentury(const date& orig_date) {
+  date new_date(orig_date.year() / 100 * 100, 1, 1);
+  time_duration new_time(0, 0, 0, 0);
+  return TimestampValue(new_date, new_time);
+}
+
+// Truncate to first day of decade
+TimestampValue TruncDecade(const date& orig_date) {
+  date new_date(orig_date.year() / 10 * 10, 1, 1);
+  time_duration new_time(0, 0, 0, 0);
+  return TimestampValue(new_date, new_time);
+}
+
 // Truncate to first day of year
 TimestampValue TruncYear(const date& orig_date) {
   date new_date(orig_date.year(), 1, 1);
@@ -119,6 +183,14 @@ TimestampValue TruncMonth(const date& orig_date) {
   return TimestampValue(new_date, new_time);
 }
 
+// Truncate to first day of the week (monday)
+TimestampValue TruncWeek(const date& orig_date) {
+  // ISO-8601 week starts on monday. go back to monday
+  date new_date = GoBackToWeekday(orig_date, 1);
+  time_duration new_time(0, 0, 0, 0);
+  return TimestampValue(new_date, new_time);
+}
+
 // Same day of the week as the first day of the year
 TimestampValue TruncWW(const date& orig_date) {
   const date& first_day_of_year = TruncYear(orig_date).date();
@@ -160,37 +232,53 @@ TimestampValue TruncMinute(const date& orig_date, const time_duration& orig_time
   time_duration new_time(orig_time.hours(), orig_time.minutes(), 0, 0);
   return TimestampValue(orig_date, new_time);
 }
+
+// Truncate parts of seconds
+TimestampValue TruncSecond(const date& orig_date, const time_duration& orig_time) {
+  time_duration new_time(orig_time.hours(), orig_time.minutes(), orig_time.seconds());
+  return TimestampValue(orig_date, new_time);
 }
 
-TimestampVal UdfBuiltins::TruncImpl(FunctionContext* context, const TimestampVal& tv,
-    const StringVal &unit_str) {
-  if (tv.is_null) return TimestampVal::null();
-  const TimestampValue& ts = TimestampValue::FromTimestampVal(tv);
-  const date& orig_date = ts.date();
-  const time_duration& orig_time = ts.time();
+// Truncate parts of milliseconds
+TimestampValue TruncMilliSeconds(const date& orig_date, const time_duration& orig_time) {
+  time_duration new_time(orig_time.hours(), orig_time.minutes(), orig_time.seconds());
+  // Fractional seconds are nanoseconds because Boost is configured to use nanoseconds
+  // precision.
+  time_duration fraction = milliseconds(orig_time.fractional_seconds() / 1000000);
+  new_time = new_time + fraction;
+  return TimestampValue(orig_date, new_time);
+}
 
-  // resolve trunc_unit using the prepared state if possible, o.w. parse now
-  // TruncPrepare() can only parse trunc_unit if user passes it as a string literal
-  // TODO: it would be nice to resolve the branch before codegen so we can optimise
-  // this better.
-  TruncUnit::Type trunc_unit;
-  void* state = context->GetFunctionState(FunctionContext::THREAD_LOCAL);
-  if (state != NULL) {
-    trunc_unit = *reinterpret_cast<TruncUnit::Type*>(state);
-  } else {
-    trunc_unit = StrToTruncUnit(context, unit_str);
-    if (trunc_unit == TruncUnit::UNIT_INVALID) {
-      string string_unit(reinterpret_cast<char*>(unit_str.ptr), unit_str.len);
-      context->SetError(Substitute("Invalid Truncate Unit: $0", string_unit).c_str());
-      return TimestampVal::null();
-    }
-  }
+// Truncate parts of microseconds
+TimestampValue TruncMicroSeconds(const date& orig_date, const time_duration& orig_time) {
+  time_duration new_time(orig_time.hours(), orig_time.minutes(), orig_time.seconds());
+  // Fractional seconds are nanoseconds because Boost is configured to use nanoseconds
+  // precision.
+  time_duration fraction = microseconds(orig_time.fractional_seconds() / 1000);
+  new_time = new_time + fraction;
+  return TimestampValue(orig_date, new_time);
+}
 
+// used by both Trunc and DateTrunc functions to perform the truncation
+TimestampVal DoTrunc(
+    const TimestampValue ts, TruncUnit trunc_unit, FunctionContext* context) {
+  const date& orig_date = ts.date();
+  const time_duration& orig_time = ts.time();
   TimestampValue ret;
   TimestampVal ret_val;
 
   // check for invalid or malformed timestamps
   switch (trunc_unit) {
+    case TruncUnit::MILLENNIUM:
+      // for millenium < 2000 year value goes to 1000 (outside the supported range)
+      if (orig_date.is_special()) return TimestampVal::null();
+      if (orig_date.year() < 2000) return TimestampVal::null();
+      break;
+    case TruncUnit::WEEK:
+      // anything less than 1400-1-6 we have to move to year 1399
+      if (orig_date.is_special()) return TimestampVal::null();
+      if (orig_date < date(1400, 1, 6)) return TimestampVal::null();
+      break;
     case TruncUnit::YEAR:
     case TruncUnit::QUARTER:
     case TruncUnit::MONTH:
@@ -198,10 +286,15 @@ TimestampVal UdfBuiltins::TruncImpl(FunctionContext* context, const TimestampVal
     case TruncUnit::W:
     case TruncUnit::DAY:
     case TruncUnit::DAY_OF_WEEK:
+    case TruncUnit::CENTURY:
+    case TruncUnit::DECADE:
       if (orig_date.is_special()) return TimestampVal::null();
       break;
     case TruncUnit::HOUR:
     case TruncUnit::MINUTE:
+    case TruncUnit::SECOND:
+    case TruncUnit::MILLISECONDS:
+    case TruncUnit::MICROSECONDS:
       if (orig_time.is_special()) return TimestampVal::null();
       break;
     case TruncUnit::UNIT_INVALID:
@@ -236,27 +329,73 @@ TimestampVal UdfBuiltins::TruncImpl(FunctionContext* context, const TimestampVal
     case TruncUnit::MINUTE:
       ret = TruncMinute(orig_date, orig_time);
       break;
+    case TruncUnit::MILLENNIUM:
+      ret = TruncMillenium(orig_date);
+      break;
+    case TruncUnit::CENTURY:
+      ret = TruncCentury(orig_date);
+      break;
+    case TruncUnit::DECADE:
+      ret = TruncDecade(orig_date);
+      break;
+    case TruncUnit::WEEK:
+      ret = TruncWeek(orig_date);
+      break;
+    case TruncUnit::SECOND:
+      ret = TruncSecond(orig_date, orig_time);
+      break;
+    case TruncUnit::MILLISECONDS:
+      ret = TruncMilliSeconds(orig_date, orig_time);
+      break;
+    case TruncUnit::MICROSECONDS:
+      ret = TruncMicroSeconds(orig_date, orig_time);
+      break;
     default:
       // internal error: implies StrToTruncUnit out of sync with this switch
-      context->SetError(Substitute("truncate unit $0 not supported", trunc_unit).c_str());
+      context->SetError("truncate unit not supported");
       return TimestampVal::null();
   }
 
   ret.ToTimestampVal(&ret_val);
   return ret_val;
 }
+}
 
-void UdfBuiltins::TruncPrepare(FunctionContext* ctx,
-    FunctionContext::FunctionStateScope scope) {
+TimestampVal UdfBuiltins::TruncImpl(
+    FunctionContext* context, const TimestampVal& tv, const StringVal& unit_str) {
+  if (tv.is_null) return TimestampVal::null();
+  const TimestampValue& ts = TimestampValue::FromTimestampVal(tv);
+
+  // resolve trunc_unit using the prepared state if possible, o.w. parse now
+  // TruncPrepare() can only parse trunc_unit if user passes it as a string literal
+  // TODO: it would be nice to resolve the branch before codegen so we can optimise
+  // this better.
+  TruncUnit trunc_unit;
+  void* state = context->GetFunctionState(FunctionContext::THREAD_LOCAL);
+  if (state != NULL) {
+    trunc_unit = *reinterpret_cast<TruncUnit*>(state);
+  } else {
+    trunc_unit = StrToTruncUnit(context, unit_str);
+    if (trunc_unit == TruncUnit::UNIT_INVALID) {
+      string string_unit(reinterpret_cast<char*>(unit_str.ptr), unit_str.len);
+      context->SetError(Substitute("Invalid Truncate Unit: $0", string_unit).c_str());
+      return TimestampVal::null();
+    }
+  }
+  return DoTrunc(ts, trunc_unit, context);
+}
+
+void UdfBuiltins::TruncPrepare(
+    FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
   // Parse the unit up front if we can, otherwise do it on the fly in Trunc()
   if (ctx->IsArgConstant(1)) {
     StringVal* unit_str = reinterpret_cast<StringVal*>(ctx->GetConstantArg(1));
-    TruncUnit::Type trunc_unit = StrToTruncUnit(ctx, *unit_str);
+    TruncUnit trunc_unit = StrToTruncUnit(ctx, *unit_str);
     if (trunc_unit == TruncUnit::UNIT_INVALID) {
       string string_unit(reinterpret_cast<char*>(unit_str->ptr), unit_str->len);
       ctx->SetError(Substitute("Invalid Truncate Unit: $0", string_unit).c_str());
     } else {
-      TruncUnit::Type* state = ctx->Allocate<TruncUnit::Type>();
+      TruncUnit* state = ctx->Allocate<TruncUnit>();
       RETURN_IF_NULL(ctx, state);
       *state = trunc_unit;
       ctx->SetFunctionState(scope, state);
@@ -271,3 +410,50 @@ void UdfBuiltins::TruncClose(FunctionContext* ctx,
   ctx->SetFunctionState(scope, nullptr);
 }
 
+TimestampVal UdfBuiltins::DateTruncImpl(
+    FunctionContext* context, const TimestampVal& tv, const StringVal& unit_str) {
+  if (tv.is_null) return TimestampVal::null();
+  const TimestampValue& ts = TimestampValue::FromTimestampVal(tv);
+
+  // resolve date_trunc_unit using the prepared state if possible, o.w. parse now
+  // DateTruncPrepare() can only parse trunc_unit if user passes it as a string literal
+  TruncUnit date_trunc_unit;
+  void* state = context->GetFunctionState(FunctionContext::THREAD_LOCAL);
+  if (state != NULL) {
+    date_trunc_unit = *reinterpret_cast<TruncUnit*>(state);
+  } else {
+    date_trunc_unit = StrToDateTruncUnit(context, unit_str);
+    if (date_trunc_unit == TruncUnit::UNIT_INVALID) {
+      string string_unit(reinterpret_cast<char*>(unit_str.ptr), unit_str.len);
+      context->SetError(
+          Substitute("Invalid Date Truncate Unit: $0", string_unit).c_str());
+      return TimestampVal::null();
+    }
+  }
+  return DoTrunc(ts, date_trunc_unit, context);
+}
+
+void UdfBuiltins::DateTruncPrepare(
+    FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
+  // Parse the unit up front if we can, otherwise do it on the fly in DateTrunc()
+  if (ctx->IsArgConstant(0)) {
+    StringVal* unit_str = reinterpret_cast<StringVal*>(ctx->GetConstantArg(0));
+    TruncUnit date_trunc_unit = StrToDateTruncUnit(ctx, *unit_str);
+    if (date_trunc_unit == TruncUnit::UNIT_INVALID) {
+      string string_unit(reinterpret_cast<char*>(unit_str->ptr), unit_str->len);
+      ctx->SetError(Substitute("Invalid Date Truncate Unit: $0", string_unit).c_str());
+    } else {
+      TruncUnit* state = ctx->Allocate<TruncUnit>();
+      RETURN_IF_NULL(ctx, state);
+      *state = date_trunc_unit;
+      ctx->SetFunctionState(scope, state);
+    }
+  }
+}
+
+void UdfBuiltins::DateTruncClose(
+    FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
+  void* state = ctx->GetFunctionState(scope);
+  ctx->Free(reinterpret_cast<uint8_t*>(state));
+  ctx->SetFunctionState(scope, nullptr);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f538b439/be/src/exprs/udf-builtins.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins.h b/be/src/exprs/udf-builtins.h
index 2ef439b..4eeb33a 100644
--- a/be/src/exprs/udf-builtins.h
+++ b/be/src/exprs/udf-builtins.h
@@ -79,8 +79,36 @@ class UdfBuiltins {
       const StringVal& unit_str);
   static void TruncPrepare(FunctionContext* context,
       FunctionContext::FunctionStateScope scope);
-  static void TruncClose(FunctionContext* context,
-      FunctionContext::FunctionStateScope scope);
+  static void TruncClose(
+      FunctionContext* context, FunctionContext::FunctionStateScope scope);
+
+  /// Rounds (truncating down) a Timestamp to the specified unit.
+  ///    Units:
+  ///    MILLENNIUM: The millennium number.
+  ///    CENTURY: The century number.
+  ///    DECADE: The year field divided by 10.
+  ///    YEAR: The year field (1400 - 9999).
+  ///    MONTH: The number of the month within the year (1–12)
+  ///    WEEK: The number of the week of the year that the day is in.
+  ///    DAY: The day (of the month) field (1–31).
+  ///    HOUR: The hour field (0–23).
+  ///    MINUTE: The minutes field (0–59).
+  ///    SECOND: The seconds field (0–59).
+  ///    MILLISECONDS: The milliseconds fraction in the seconds.
+  ///    MICROSECONDS: The microseconds fraction in the seconds.
+
+  ///    Reference:
+  ///    https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/
+  ///       SQLReferenceManual/Functions/Date-Time/DATE_TRUNC.htm
+  static TimestampVal DateTrunc(
+      FunctionContext* context, const StringVal& unit_str, const TimestampVal& date);
+  /// Implementation of DateTrunc, not cross-compiled.
+  static TimestampVal DateTruncImpl(
+      FunctionContext* context, const TimestampVal& date, const StringVal& unit_str);
+  static void DateTruncPrepare(
+      FunctionContext* context, FunctionContext::FunctionStateScope scope);
+  static void DateTruncClose(
+      FunctionContext* context, FunctionContext::FunctionStateScope scope);
 
   /// Returns a single field from a timestamp
   ///    Fields:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f538b439/common/function-registry/impala_functions.py
----------------------------------------------------------------------
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index f43c7c1..0e469e6 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -124,6 +124,10 @@ visible_functions = [
   [['millisecond'], 'INT', ['TIMESTAMP'], '_ZN6impala18TimestampFunctions11MillisecondEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
   [['to_date'], 'STRING', ['TIMESTAMP'], '_ZN6impala18TimestampFunctions6ToDateEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
   [['dayname'], 'STRING', ['TIMESTAMP'], '_ZN6impala18TimestampFunctions7DayNameEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
+  [['date_trunc'], 'TIMESTAMP', ['STRING', 'TIMESTAMP'],
+   '_ZN6impala11UdfBuiltins9DateTruncEPN10impala_udf15FunctionContextERKNS1_9StringValERKNS1_12TimestampValE',
+   '_ZN6impala11UdfBuiltins16DateTruncPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
+   '_ZN6impala11UdfBuiltins14DateTruncCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
   [['years_add'], 'TIMESTAMP', ['TIMESTAMP', 'INT'],
       '_ZN6impala18TimestampFunctions6AddSubILb1EN10impala_udf6IntValEN5boost9date_time14years_durationINS4_9gregorian21greg_durations_configEEELb0EEENS2_12TimestampValEPNS2_15FunctionContextERKSA_RKT0_'],
   [['years_add'], 'TIMESTAMP', ['TIMESTAMP', 'BIGINT'],