You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2021/11/09 16:13:57 UTC
[impala] branch master updated: IMPALA-10984: Improve
TimestampValue to String casting
This is an automated email from the ASF dual-hosted git repository.
csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new df42225 IMPALA-10984: Improve TimestampValue to String casting
df42225 is described below
commit df42225f5c8f65a66192b47612d17259d1b2dc6c
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Oct 25 14:10:57 2021 -0700
IMPALA-10984: Improve TimestampValue to String casting
TimestampValue::ToString was implemented by concatenating
boost::gregorian::to_iso_extended_string and
boost::posix_time::to_simple_string using stringstream. This involves
multiple string allocations, copying, and might hit lock within
tcmalloc::CentralFreeList. FROM_UNIXTIME and CAST expression that
touches this function can be inefficient if the expression is being
evaluated for millions of rows.
This patch adds method TimestampValue::ToStringVal and reimplements
TimestampValue::ToString by supplying default DateTimeFormatContext if
no pattern was specified. "yyyy-MM-dd HH:mm:ss" will be picked as the
default format if the time_ component does not have fractional seconds.
Otherwise, "yyyy-MM-dd HH:mm:ss.SSSSSSSSS" will be picked as the default
format. The chosen DateTimeFormatContext then is passed to
TimestampParser::Format along with date_ and time_ to be formatted into
the string representation. Int to string parsing method is replaced with
FastInt32ToBufferLeft in TimestampParser::Format.
We ran a set of expression benchmarks in a machine with Intel(R)
Core(TM) i7-4790 CPU @ 3.60GHz. This patch gives > 10X performance
improvement for CAST timestamp to string and FROM_UNIXTIME without a
date-time pattern. Following are the detailed results before and after
the patch.
Before the patch:
FromUnixCodegen: Function 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
(relative) (relative) (relative)
---------------------------------------------------------------------------------------------------
literal 36.7 37 37.3 1X 1X 1X
cast(now() as string) 2.31 2.31 2.33 0.0628X 0.0623X 0.0626X
cast(now() as string format 'Y .SSSSS') 16.9 17.5 17.5 0.459X 0.472X 0.471X
from_unixtime(0,'yyyy-MM-dd HH:mm:ss') 6.3 6.3 6.37 0.171X 0.17X 0.171X
from_unixtime(0,'yyyy-MM-dd') 11.8 11.8 12 0.32X 0.32X 0.322X
from_unixtime(0) 2.36 2.4 2.4 0.0644X 0.0648X 0.0644X
After the patch:
FromUnixCodegen: Function 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
(relative) (relative) (relative)
---------------------------------------------------------------------------------------------------
literal 37.7 38.1 38.4 1X 1X 1X
cast(now() as string) 29.9 30.1 30.2 0.794X 0.79X 0.787X
cast(now() as string format 'Y .SSSSS') 61.1 61.3 61.6 1.62X 1.61X 1.61X
from_unixtime(0,'yyyy-MM-dd HH:mm:ss') 33.6 33.8 34.2 0.892X 0.887X 0.892X
from_unixtime(0,'yyyy-MM-dd') 50.5 50.6 50.9 1.34X 1.33X 1.33X
from_unixtime(0) 34 34.2 34.5 0.902X 0.896X 0.898X
The literal expression used as the baseline in this benchmark is
"cast('2012-01-01 09:10:11.123456789' as timestamp)".
This patch also updates numbers in expr-benchmark for
BenchmarkTimestampFunctions and tidy up expr-benchmark a bit to clear
its MemPool in between benchmark iteration so that it does not run out
of memory.
Testing:
- Pass core tests.
Change-Id: I4fcb4545d9c9a3fdb38c4db58bb4b1321a429d61
Reviewed-on: http://gerrit.cloudera.org:8080/17980
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Csaba Ringhofer <cs...@cloudera.com>
Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
be/src/benchmarks/expr-benchmark.cc | 147 +++++++++++----------
be/src/exprs/aggregate-functions-ir.cc | 2 +-
be/src/exprs/cast-functions-ir.cc | 14 +-
be/src/exprs/literal.cc | 2 +-
be/src/exprs/timestamp-functions-ir.cc | 9 +-
be/src/exprs/timestamp-functions.cc | 1 -
be/src/runtime/date-parse-util.cc | 6 +-
.../runtime/datetime-iso-sql-format-tokenizer.cc | 15 ++-
be/src/runtime/datetime-iso-sql-format-tokenizer.h | 3 +
be/src/runtime/datetime-parser-common.cc | 5 +-
be/src/runtime/datetime-parser-common.h | 21 ++-
.../runtime/datetime-simple-date-format-parser.cc | 8 +-
.../runtime/datetime-simple-date-format-parser.h | 11 ++
be/src/runtime/timestamp-parse-util.cc | 54 ++++----
be/src/runtime/timestamp-parse-util.h | 26 +++-
be/src/runtime/timestamp-test.cc | 12 +-
be/src/runtime/timestamp-value.cc | 34 +++--
be/src/runtime/timestamp-value.h | 19 ++-
be/src/runtime/timestamp-value.inline.h | 29 +++-
be/src/util/min-max-filter.cc | 2 +-
20 files changed, 261 insertions(+), 159 deletions(-)
diff --git a/be/src/benchmarks/expr-benchmark.cc b/be/src/benchmarks/expr-benchmark.cc
index 0ece02f..d850a57 100644
--- a/be/src/benchmarks/expr-benchmark.cc
+++ b/be/src/benchmarks/expr-benchmark.cc
@@ -79,7 +79,7 @@ struct ExprTestData {
// error handling.
class Planner {
public:
- Planner() : mem_pool_(&tracker_) {
+ Planner() : expr_perm_pool_(&tracker_), expr_results_pool_(&tracker_) {
frontend_.SetCatalogIsReady();
ABORT_IF_ERROR(exec_env_.InitForFeSupport());
query_options_.enable_expr_rewrites = false;
@@ -134,8 +134,8 @@ class Planner {
RETURN_IF_ERROR(
ScalarExpr::Create(texprs[0], RowDescriptor(), fragment_state, &expr));
ScalarExprEvaluator* eval;
- RETURN_IF_ERROR(
- ScalarExprEvaluator::Create(*expr, state, &pool_, &mem_pool_, &mem_pool_, &eval));
+ RETURN_IF_ERROR(ScalarExprEvaluator::Create(
+ *expr, state, state->obj_pool(), &expr_perm_pool_, &expr_results_pool_, &eval));
// UDFs which cannot be interpreted need to be handled by codegen.
// This follow examples from fe-support.cc
@@ -153,6 +153,10 @@ class Planner {
return Status::OK();
}
+ void ClearExprPermPool() { expr_perm_pool_.Clear(); }
+
+ void ClearExprResultsPool() { expr_results_pool_.Clear(); }
+
private:
Frontend frontend_;
ExecEnv exec_env_;
@@ -162,7 +166,8 @@ class Planner {
ObjectPool pool_;
MemTracker tracker_;
- MemPool mem_pool_;
+ MemPool expr_perm_pool_;
+ MemPool expr_results_pool_;
};
Planner* planner;
@@ -201,6 +206,10 @@ void BenchmarkQueryFn(int batch_size, void* d) {
}
}
+void SetupBenchmark(void* d) {
+ planner->ClearExprResultsPool();
+}
+
#define BENCHMARK(name, stmt) \
suite->AddBenchmark(name, BenchmarkQueryFn, GenerateBenchmarkExprs(stmt))
// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
@@ -764,72 +773,74 @@ Benchmark* BenchmarkMathFunctions(bool codegen) {
// TimestampFn: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// literal 34.4 34.6 34.8 1X 1X 1X
-// to_string 2.09 2.12 2.12 0.0609X 0.0611X 0.0607X
-// add_year 16.7 16.7 16.8 0.485X 0.481X 0.482X
-// sub_month 16.5 16.5 16.6 0.48X 0.476X 0.477X
-// add_weeks 20.9 20.9 21.1 0.609X 0.604X 0.605X
-// sub_days 19.7 19.7 19.8 0.574X 0.57X 0.569X
-// add 20.7 20.9 21.1 0.604X 0.604X 0.605X
-// sub_hours 19.4 19.5 19.6 0.563X 0.564X 0.563X
-// add_minutes 19.9 19.9 20 0.58X 0.575X 0.575X
-// sub_seconds 19.9 19.9 20 0.58X 0.575X 0.574X
-// add_milli 18.3 18.3 18.5 0.534X 0.529X 0.53X
-// sub_micro 18.3 18.3 18.5 0.534X 0.529X 0.53X
-// add_nano 18.7 18.9 19 0.544X 0.545X 0.546X
-// unix_timestamp1 38.7 38.9 39.1 1.13X 1.12X 1.12X
-// unix_timestamp2 53 53.3 53.5 1.54X 1.54X 1.54X
-// from_unix1 6.11 6.11 6.19 0.178X 0.176X 0.178X
-// from_unix2 10.7 10.9 11.2 0.312X 0.316X 0.321X
-// year 38.9 39.1 39.3 1.13X 1.13X 1.13X
-// month 39.1 39.3 39.5 1.14X 1.13X 1.13X
-// day of month 38.7 38.7 38.9 1.13X 1.12X 1.12X
-// day of year 34.6 34.8 34.8 1.01X 1.01X 1X
-// week of year 33.3 33.6 33.8 0.97X 0.971X 0.97X
-// hour 76.7 77.4 78.4 2.23X 2.24X 2.25X
-// minute 76.5 77 78.1 2.23X 2.22X 2.24X
-// second 74.6 75.4 76.4 2.17X 2.18X 2.19X
-// to date 19 19.1 19.2 0.554X 0.551X 0.552X
-// date diff 17.5 17.5 17.6 0.509X 0.505X 0.505X
-// from utc 22.1 22.4 22.4 0.644X 0.646X 0.644X
-// to utc 18.9 18.9 19 0.55X 0.545X 0.546X
-// now 290 293 295 8.44X 8.47X 8.48X
-// unix_timestamp 209 211 213 6.09X 6.08X 6.11X
+// literal 33.8 34 34.1 1X 1X 1X
+// to_string 13.3 13.4 13.5 0.395X 0.395X 0.395X
+// add_year 16.5 16.7 16.8 0.488X 0.49X 0.492X
+// sub_month 16.3 16.3 16.4 0.482X 0.48X 0.481X
+// add_weeks 20.4 20.4 20.5 0.603X 0.599X 0.601X
+// sub_days 19.4 19.4 19.5 0.573X 0.569X 0.572X
+// add 20.6 20.6 20.7 0.608X 0.605X 0.608X
+// sub_hours 18.7 18.7 18.9 0.553X 0.55X 0.553X
+// add_minutes 19.4 19.5 19.6 0.573X 0.575X 0.575X
+// sub_seconds 19.1 19.4 19.4 0.564X 0.569X 0.569X
+// add_milli 18.5 18.5 18.7 0.548X 0.545X 0.546X
+// sub_micro 17.9 17.9 18.1 0.529X 0.526X 0.529X
+// add_nano 18.3 18.3 18.5 0.542X 0.54X 0.541X
+// unix_timestamp1 37.9 37.9 38.1 1.12X 1.11X 1.12X
+// unix_timestamp2 51.9 52.2 52.5 1.54X 1.54X 1.54X
+// from_unix1 30.4 30.7 30.9 0.899X 0.902X 0.904X
+// from_unix2 43.3 43.5 44 1.28X 1.28X 1.29X
+// from_unix3 30.7 31.1 31.3 0.91X 0.916X 0.917X
+// year 39.3 39.4 39.7 1.16X 1.16X 1.16X
+// month 39.5 40.1 40.3 1.17X 1.18X 1.18X
+// day of month 38.2 38.4 38.5 1.13X 1.13X 1.13X
+// day of year 35.4 35.6 35.8 1.05X 1.05X 1.05X
+// week of year 34.6 34.6 34.8 1.02X 1.02X 1.02X
+// hour 81.5 81.9 82.5 2.41X 2.41X 2.42X
+// minute 80 80.5 81.5 2.37X 2.37X 2.39X
+// second 81.2 82.2 82.9 2.4X 2.42X 2.43X
+// to date 19.4 19.4 19.5 0.573X 0.569X 0.57X
+// date diff 17.5 17.5 17.6 0.518X 0.515X 0.514X
+// from utc 21.9 21.9 22.2 0.649X 0.646X 0.649X
+// to utc 19.4 19.4 19.4 0.573X 0.569X 0.569X
+// now 286 287 290 8.45X 8.45X 8.5X
+// unix_timestamp 207 208 211 6.13X 6.13X 6.17X
//
// TimestampFnCodegen: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// literal 37.2 37.4 37.5 1X 1X 1X
-// to_string 2.14 2.16 2.16 0.0576X 0.0577X 0.0575X
-// add_year 18.5 18.5 18.7 0.498X 0.495X 0.497X
-// sub_month 18.3 18.3 18.5 0.493X 0.49X 0.492X
-// add_weeks 23.9 24 24.1 0.641X 0.641X 0.641X
-// sub_days 22.1 22.1 22.3 0.595X 0.592X 0.593X
-// add 23.6 23.6 23.8 0.634X 0.631X 0.634X
-// sub_hours 21.9 21.9 22 0.59X 0.587X 0.587X
-// add_minutes 22.4 22.6 22.7 0.602X 0.604X 0.606X
-// sub_seconds 22.4 22.4 22.6 0.602X 0.599X 0.602X
-// add_milli 20.6 20.6 20.7 0.552X 0.55X 0.552X
-// sub_micro 20.6 20.7 20.9 0.552X 0.554X 0.556X
-// add_nano 21.1 21.3 21.4 0.567X 0.568X 0.57X
-// unix_timestamp1 46.9 46.9 47.1 1.26X 1.25X 1.25X
-// unix_timestamp2 64.2 64.4 64.7 1.72X 1.72X 1.72X
-// from_unix1 6.3 6.3 6.37 0.169X 0.168X 0.17X
-// from_unix2 11.3 11.6 11.8 0.305X 0.309X 0.315X
-// year 59.3 59.4 59.8 1.59X 1.59X 1.59X
-// month 59.3 59.4 59.7 1.59X 1.59X 1.59X
-// day of month 58.8 59.3 59.6 1.58X 1.58X 1.59X
-// day of year 53.4 53.5 53.8 1.44X 1.43X 1.43X
-// week of year 50.4 50.6 51.2 1.35X 1.35X 1.36X
-// hour 126 127 129 3.38X 3.4X 3.44X
-// minute 126 126 129 3.38X 3.38X 3.43X
-// second 126 127 128 3.38X 3.4X 3.42X
-// to date 23.1 23.1 23.4 0.622X 0.619X 0.623X
-// date diff 22.4 22.4 22.6 0.602X 0.599X 0.601X
-// from utc 38.9 39.4 39.6 1.05X 1.05X 1.06X
-// to utc 21.8 21.8 21.9 0.585X 0.582X 0.582X
-// now 517 522 527 13.9X 14X 14X
-// unix_timestamp 378 381 384 10.2X 10.2X 10.2X
+// literal 38.2 38.4 38.6 1X 1X 1X
+// to_string 15 15.3 15.3 0.392X 0.398X 0.396X
+// add_year 18.3 18.3 18.5 0.479X 0.477X 0.478X
+// sub_month 17.9 18.1 18.1 0.469X 0.47X 0.47X
+// add_weeks 23.8 23.8 23.9 0.622X 0.619X 0.619X
+// sub_days 22.6 22.6 22.7 0.59X 0.588X 0.589X
+// add 23.4 23.4 23.5 0.613X 0.61X 0.609X
+// sub_hours 21.8 21.8 21.9 0.569X 0.566X 0.566X
+// add_minutes 21.9 21.9 22 0.574X 0.571X 0.57X
+// sub_seconds 21.9 21.9 22 0.574X 0.571X 0.57X
+// add_milli 20.9 20.9 21.1 0.547X 0.545X 0.547X
+// sub_micro 20.1 20.1 20.2 0.525X 0.523X 0.523X
+// add_nano 21.1 21.1 21.3 0.552X 0.549X 0.551X
+// unix_timestamp1 47 47.2 47.5 1.23X 1.23X 1.23X
+// unix_timestamp2 61.3 61.5 61.8 1.6X 1.6X 1.6X
+// from_unix1 34.8 35 35.3 0.91X 0.911X 0.914X
+// from_unix2 51.5 51.5 52 1.35X 1.34X 1.35X
+// from_unix3 34.8 35 35.3 0.91X 0.911X 0.914X
+// year 57.4 57.8 58.5 1.5X 1.5X 1.51X
+// month 58.4 58.6 59.1 1.53X 1.53X 1.53X
+// day of month 58.9 59.1 59.4 1.54X 1.54X 1.54X
+// day of year 53.2 53.7 54.3 1.39X 1.4X 1.41X
+// week of year 50.9 51.1 51.4 1.33X 1.33X 1.33X
+// hour 125 132 134 3.26X 3.43X 3.48X
+// minute 132 133 134 3.46X 3.46X 3.48X
+// second 132 133 135 3.46X 3.47X 3.49X
+// to date 24.2 24.3 24.6 0.632X 0.631X 0.637X
+// date diff 22.6 22.6 22.7 0.591X 0.588X 0.589X
+// from utc 38.1 38.5 38.8 0.995X 1X 1X
+// to utc 22.1 22.1 22.4 0.579X 0.576X 0.58X
+// now 517 520 524 13.5X 13.5X 13.6X
+// unix_timestamp 399 403 406 10.4X 10.5X 10.5X
Benchmark* BenchmarkTimestampFunctions(bool codegen) {
Benchmark* suite = new Benchmark(BenchmarkName("TimestampFn", codegen));
BENCHMARK("literal", "cast('2012-01-01 09:10:11.123456789' as timestamp)");
@@ -863,6 +874,7 @@ Benchmark* BenchmarkTimestampFunctions(bool codegen) {
"unix_timestamp('1970-10-01', 'yyyy-MM-dd')");
BENCHMARK("from_unix1", "from_unixtime(0, 'yyyy-MM-dd HH:mm:ss')");
BENCHMARK("from_unix2", "from_unixtime(0, 'yyyy-MM-dd')");
+ BENCHMARK("from_unix3", "from_unixtime(0)");
BENCHMARK("year", "year(cast('2011-12-22' as timestamp))");
BENCHMARK("month", "month(cast('2011-12-22' as timestamp))");
BENCHMARK("day of month", "dayofmonth(cast('2011-12-22' as timestamp))");
@@ -919,9 +931,10 @@ int main(int argc, char** argv) {
cout << Benchmark::GetMachineInfo() << endl;
for (auto& benchmark : benchmarks) {
for (int codegen = 0; codegen <= 1; codegen++) {
+ planner->ClearExprPermPool();
planner->EnableCodegen(codegen);
Benchmark* suite = (*benchmark)(codegen);
- cout << suite->Measure() << endl;
+ cout << suite->Measure(50, 10, SetupBenchmark) << endl;
}
}
diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc
index 50fc26b..45998e4 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -1384,7 +1384,7 @@ void PrintSample(const ReservoirSample<DecimalVal>& v, ostream* os) {
template <>
void PrintSample(const ReservoirSample<TimestampVal>& v, ostream* os) {
- *os << TimestampValue::FromTimestampVal(v.val).ToString();
+ *os << TimestampValue::FromTimestampVal(v.val);
}
template <>
diff --git a/be/src/exprs/cast-functions-ir.cc b/be/src/exprs/cast-functions-ir.cc
index a37f068..0835ae5 100644
--- a/be/src/exprs/cast-functions-ir.cc
+++ b/be/src/exprs/cast-functions-ir.cc
@@ -21,20 +21,16 @@
#include <sstream>
#include <string>
-#include <boost/date_time/gregorian/gregorian.hpp>
-#include <boost/date_time/posix_time/posix_time.hpp>
#include <gutil/strings/numbers.h>
#include <gutil/strings/substitute.h>
#include "exprs/anyval-util.h"
#include "exprs/cast-format-expr.h"
#include "exprs/decimal-functions.h"
-#include "runtime/runtime-state.h"
#include "runtime/string-value.inline.h"
#include "runtime/timestamp-value.h"
#include "runtime/timestamp-value.inline.h"
#include "util/string-parser.h"
-#include "string-functions.h"
#include "common/names.h"
@@ -192,14 +188,8 @@ StringVal CastFunctions::CastToStringVal(FunctionContext* ctx, const TimestampVa
const DateTimeFormatContext* format_ctx =
reinterpret_cast<const DateTimeFormatContext*>(
ctx->GetFunctionState(FunctionContext::FRAGMENT_LOCAL));
- StringVal sv;
- if (format_ctx == nullptr) {
- sv = AnyValUtil::FromString(ctx, tv.ToString());
- } else {
- string formatted_timestamp = tv.Format(*format_ctx);
- if (formatted_timestamp.empty()) return StringVal::null();
- sv = AnyValUtil::FromString(ctx, formatted_timestamp);
- }
+ StringVal sv =
+ (format_ctx == nullptr) ? tv.ToStringVal(ctx) : tv.ToStringVal(ctx, *format_ctx);
AnyValUtil::TruncateIfNecessary(ctx->GetReturnType(), &sv);
return sv;
}
diff --git a/be/src/exprs/literal.cc b/be/src/exprs/literal.cc
index ed38899..63efb96 100644
--- a/be/src/exprs/literal.cc
+++ b/be/src/exprs/literal.cc
@@ -354,7 +354,7 @@ string Literal::DebugString() const {
}
break;
case TYPE_TIMESTAMP:
- out << value_.timestamp_val.ToString();
+ out << value_.timestamp_val;
break;
case TYPE_DATE:
out << value_.date_val;
diff --git a/be/src/exprs/timestamp-functions-ir.cc b/be/src/exprs/timestamp-functions-ir.cc
index 12e16c7..1cc06e1 100644
--- a/be/src/exprs/timestamp-functions-ir.cc
+++ b/be/src/exprs/timestamp-functions-ir.cc
@@ -20,7 +20,6 @@
#include <ctime>
#include <iomanip>
-#include <boost/date_time/compiler_config.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/date_time/gregorian/gregorian_types.hpp>
#include <gutil/strings/substitute.h>
@@ -71,10 +70,7 @@ StringVal TimestampFunctions::StringValFromTimestamp(FunctionContext* context,
}
}
- string formatted_timestamp = tv.Format(*dt_ctx);
- if (formatted_timestamp.empty()) return StringVal::null();
- StringVal result = AnyValUtil::FromString(context, formatted_timestamp);
- return result;
+ return tv.ToStringVal(context, *dt_ctx);
}
template <class TIME>
@@ -82,8 +78,7 @@ StringVal TimestampFunctions::FromUnix(FunctionContext* context, const TIME& int
if (intp.is_null) return StringVal::null();
const TimestampValue tv = TimestampValue::FromUnixTime(intp.val,
context->impl()->state()->time_zone_for_unix_time_conversions());
- if (!tv.HasDateAndTime()) return StringVal::null();
- return AnyValUtil::FromString(context, tv.ToString());
+ return tv.ToStringVal(context);
}
template <class TIME>
diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc
index cafc26e..e92e8a4 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -24,7 +24,6 @@
#include "runtime/datetime-simple-date-format-parser.h"
#include "runtime/string-value.inline.h"
#include "runtime/timestamp-value.h"
-#include "runtime/timestamp-value.inline.h"
#include "udf/udf-internal.h"
#include "udf/udf.h"
diff --git a/be/src/runtime/date-parse-util.cc b/be/src/runtime/date-parse-util.cc
index 8a37f62..8db8def 100644
--- a/be/src/runtime/date-parse-util.cc
+++ b/be/src/runtime/date-parse-util.cc
@@ -17,8 +17,6 @@
#include "runtime/date-parse-util.h"
-#include <boost/date_time/gregorian/gregorian.hpp>
-
#include "cctz/civil_time.h"
#include "runtime/datetime-iso-sql-format-parser.h"
#include "runtime/datetime-simple-date-format-parser.h"
@@ -145,7 +143,7 @@ string DateParser::Format(const DateTimeFormatContext& dt_ctx, const DateValue&
switch (tok.type) {
case YEAR:
case ROUND_YEAR: {
- num_val = AdjustYearToLength(year, tok.len);
+ num_val = AdjustYearToLength(year, tok.divisor);
break;
}
case QUARTER_OF_YEAR: {
@@ -189,7 +187,7 @@ string DateParser::Format(const DateTimeFormatContext& dt_ctx, const DateValue&
break;
}
case ISO8601_WEEK_NUMBERING_YEAR: {
- num_val = AdjustYearToLength(date.Iso8601WeekNumberingYear(), tok.len);
+ num_val = AdjustYearToLength(date.Iso8601WeekNumberingYear(), tok.divisor);
break;
}
case ISO8601_WEEK_OF_YEAR: {
diff --git a/be/src/runtime/datetime-iso-sql-format-tokenizer.cc b/be/src/runtime/datetime-iso-sql-format-tokenizer.cc
index 4c24960..6f074eb 100644
--- a/be/src/runtime/datetime-iso-sql-format-tokenizer.cc
+++ b/be/src/runtime/datetime-iso-sql-format-tokenizer.cc
@@ -20,7 +20,6 @@
#include <strings.h>
#include <algorithm>
#include <cstring>
-#include <utility>
#include <vector>
#include <boost/algorithm/string/case_conv.hpp>
@@ -99,6 +98,8 @@ const unsigned IsoSqlFormatTokenizer::MAX_TOKEN_SIZE = 5;
const int IsoSqlFormatTokenizer::MAX_FORMAT_LENGTH = 100;
+const int IsoSqlFormatTokenizer::FRACTIONAL_MAX_LEN = 9;
+
FormatTokenizationResult IsoSqlFormatTokenizer::Tokenize() {
DCHECK(dt_ctx_ != nullptr);
DCHECK(dt_ctx_->fmt != nullptr);
@@ -156,8 +157,16 @@ FormatTokenizationResult IsoSqlFormatTokenizer::ProcessNextToken(
}
if (cast_mode_ == PARSE && IsUsedToken(token_to_probe)) return DUPLICATE_FORMAT;
if (!accept_time_toks_ && token->second.time_token) return DATE_WITH_TIME_ERROR;
- DateTimeFormatToken format_token(DateTimeFormatToken(token->second.type,
- *current_pos - str_begin, GetMaxTokenLength(token->first), *current_pos));
+ int max_len = GetMaxTokenLength(token->first);
+ DateTimeFormatToken format_token(DateTimeFormatToken(
+ token->second.type, *current_pos - str_begin, max_len, *current_pos));
+ dt_ctx_->fmt_out_len += max_len;
+ if (token->second.type == YEAR || token->second.type == ROUND_YEAR
+ || token->second.type == ISO8601_WEEK_NUMBERING_YEAR) {
+ format_token.divisor = std::pow(10, max_len);
+ } else if (token->second.type == FRACTION) {
+ format_token.divisor = std::pow(10, FRACTIONAL_MAX_LEN - max_len);
+ }
if (fm_modifier_active_) {
fm_modifier_active_ = false;
format_token.fm_modifier = true;
diff --git a/be/src/runtime/datetime-iso-sql-format-tokenizer.h b/be/src/runtime/datetime-iso-sql-format-tokenizer.h
index 9d973aa..2b7064f 100644
--- a/be/src/runtime/datetime-iso-sql-format-tokenizer.h
+++ b/be/src/runtime/datetime-iso-sql-format-tokenizer.h
@@ -86,6 +86,9 @@ private:
/// strings.
static const int MAX_FORMAT_LENGTH;
+ /// Maximum length of fractional second digits.
+ static const int FRACTIONAL_MAX_LEN;
+
/// When parsing is in progress this contains the format tokens that we have found in
/// the input format string so far.
std::unordered_set<std::string> used_tokens_;
diff --git a/be/src/runtime/datetime-parser-common.cc b/be/src/runtime/datetime-parser-common.cc
index b931f3d..efb026d 100644
--- a/be/src/runtime/datetime-parser-common.cc
+++ b/be/src/runtime/datetime-parser-common.cc
@@ -408,9 +408,8 @@ int GetWeekOfMonth(int day) {
return (day - 1) / 7 + 1;
}
-int AdjustYearToLength(int year, int len) {
- if (len < 4) {
- int adjust_factor = std::pow(10, len);
+int AdjustYearToLength(int year, int adjust_factor) {
+ if (adjust_factor < 10000) {
return year % adjust_factor;
}
return year;
diff --git a/be/src/runtime/datetime-parser-common.h b/be/src/runtime/datetime-parser-common.h
index 6ae0a92..dddac7f 100644
--- a/be/src/runtime/datetime-parser-common.h
+++ b/be/src/runtime/datetime-parser-common.h
@@ -204,10 +204,17 @@ struct DateTimeFormatToken {
/// content of the token double-escaped.
bool is_double_escaped;
+ /// Helper for fast div/modulo on FRACTION and YEAR tokens.
+ int divisor;
+
DateTimeFormatToken(DateTimeFormatTokenType type, int pos, int len, const char* val)
- : type(type), pos(pos), len(len), val(val), fm_modifier(false),
- is_double_escaped(false) {
- }
+ : type(type),
+ pos(pos),
+ len(len),
+ val(val),
+ fm_modifier(false),
+ is_double_escaped(false),
+ divisor(1000000000) {}
};
/// Holds metadata about the datetime format. In the format parsing process the members of
@@ -226,6 +233,8 @@ struct DateTimeFormatContext {
/// produces output that is longer than the format string.
/// - ISO SQL parsing has token types where the output length is different from the
/// token length like: 'MONTH', 'DAY', 'HH12', 'HH24', FF1, FF2, FF4, etc.
+ /// In those edge cases, 'fmt_out_len' will be set as maximum possible length that might
+ /// be produced from the input format string.
int fmt_out_len;
/// Vector of tokens found in the format string.
std::vector<DateTimeFormatToken> toks;
@@ -368,9 +377,9 @@ int GetWeekOfYear(int year, int month, int day);
/// month starts from the first day of the month.
int GetWeekOfMonth(int day);
-/// Returns the year adjusted to 'len' digits.
-/// E.g. AdjustYearToLength(1789, 3) returns 789.
-int AdjustYearToLength(int year, int len);
+/// Returns the year modulo 'adjust_factor'.
+/// E.g. AdjustYearToLength(1789, 1000) returns 789.
+int AdjustYearToLength(int year, int adjust_factor);
}
}
diff --git a/be/src/runtime/datetime-simple-date-format-parser.cc b/be/src/runtime/datetime-simple-date-format-parser.cc
index 3d6d79c..dac71d8 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.cc
+++ b/be/src/runtime/datetime-simple-date-format-parser.cc
@@ -22,7 +22,6 @@
#include "cctz/civil_time.h"
#include "common/names.h"
#include "runtime/string-value.h"
-#include "runtime/string-value.inline.h"
#include "util/string-parser.h"
using boost::unordered_map;
@@ -37,6 +36,7 @@ bool SimpleDateFormatTokenizer::initialized = false;
const int SimpleDateFormatTokenizer::DEFAULT_DATE_FMT_LEN = 10;
const int SimpleDateFormatTokenizer::DEFAULT_SHORT_DATE_TIME_FMT_LEN = 19;
const int SimpleDateFormatTokenizer::DEFAULT_DATE_TIME_FMT_LEN = 29;
+const int SimpleDateFormatTokenizer::FRACTIONAL_MAX_LEN = 9;
DateTimeFormatContext SimpleDateFormatTokenizer::DEFAULT_SHORT_DATE_TIME_CTX;
DateTimeFormatContext SimpleDateFormatTokenizer::DEFAULT_SHORT_ISO_DATE_TIME_CTX;
@@ -49,7 +49,6 @@ void SimpleDateFormatTokenizer::InitCtx() {
// Setup the default date/time context yyyy-MM-dd HH:mm:ss.SSSSSSSSS
const char* DATE_TIME_CTX_FMT = "yyyy-MM-dd HH:mm:ss.SSSSSSSSS";
- const int FRACTIONAL_MAX_LEN = 9;
for (int i = FRACTIONAL_MAX_LEN; i >= 0; --i) {
DEFAULT_DATE_TIME_CTX[i].Reset(DATE_TIME_CTX_FMT,
DEFAULT_DATE_TIME_FMT_LEN - (FRACTIONAL_MAX_LEN - i));
@@ -174,6 +173,11 @@ bool SimpleDateFormatTokenizer::Tokenize(
if (tok_len == 1) ++dt_ctx->fmt_out_len;
DateTimeFormatToken tok(tok_type, str - str_begin, tok_len, str);
str += tok.len;
+ if (tok_type == YEAR) {
+ tok.divisor = std::pow(10, tok_len);
+ } else if (tok_type == FRACTION) {
+ tok.divisor = std::pow(10, FRACTIONAL_MAX_LEN - tok_len);
+ }
dt_ctx->toks.push_back(tok);
}
if (cast_mode == PARSE) return (dt_ctx->has_date_toks);
diff --git a/be/src/runtime/datetime-simple-date-format-parser.h b/be/src/runtime/datetime-simple-date-format-parser.h
index 5fa5ba7..30b0812 100644
--- a/be/src/runtime/datetime-simple-date-format-parser.h
+++ b/be/src/runtime/datetime-simple-date-format-parser.h
@@ -76,6 +76,7 @@ public:
static const int DEFAULT_DATE_FMT_LEN;
static const int DEFAULT_SHORT_DATE_TIME_FMT_LEN;
static const int DEFAULT_DATE_TIME_FMT_LEN;
+ static const int FRACTIONAL_MAX_LEN;
/// Parse the date/time format into tokens and place them in the context.
/// dt_ctx -- output date/time format context
@@ -110,6 +111,16 @@ public:
static const DateTimeFormatContext* GetDefaultFormatContext(const char* str, int len,
bool accept_time_toks);
+ /// Return default date/time format context for a timestamp parsing.
+ /// If 'time' has a fractional seconds, context with pattern
+ /// "yyyy-MM-dd HH:mm:ss.SSSSSSSSS" will be returned. Otherwise, return context with
+ /// pattern "yyyy-MM-dd HH:mm:ss".
+ static ALWAYS_INLINE const DateTimeFormatContext* GetDefaultTimestampFormatContext(
+ const boost::posix_time::time_duration& time) {
+ return time.fractional_seconds() > 0 ? &DEFAULT_DATE_TIME_CTX[9] :
+ &DEFAULT_SHORT_DATE_TIME_CTX;
+ }
+
/// Initialize the default format contexts. This *must* be called before using
/// GetDefaultFormatContext().
static void InitCtx();
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index 2b1447b..506b69f 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -17,24 +17,19 @@
#include "runtime/timestamp-parse-util.h"
-#include <algorithm>
#include <cctype>
#include <cstdint>
#include <ostream>
#include <vector>
-#include <boost/date_time/date.hpp>
#include <boost/date_time/gregorian/greg_calendar.hpp>
#include <boost/date_time/gregorian/greg_duration.hpp>
-#include <boost/date_time/gregorian_calendar.hpp>
#include <boost/date_time/posix_time/posix_time_config.hpp>
#include <boost/date_time/posix_time/posix_time_duration.hpp>
#include <boost/date_time/posix_time/ptime.hpp>
#include <boost/date_time/special_defs.hpp>
-#include <boost/date_time/time.hpp>
-#include <boost/date_time/time_duration.hpp>
-#include <boost/date_time/time_system_split.hpp>
#include <boost/exception/exception.hpp>
+#include <gutil/strings/numbers.h>
#include "runtime/datetime-iso-sql-format-parser.h"
#include "runtime/datetime-simple-date-format-parser.h"
@@ -42,7 +37,6 @@
#include "runtime/runtime-state.h"
#include "runtime/string-value.inline.h"
#include "udf/udf-internal.h"
-#include "util/string-parser.h"
#include "common/names.h"
@@ -231,19 +225,19 @@ bool TimestampParser::ParseIsoSqlFormat(const char* str, int len,
return true;
}
-string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date& d,
- const time_duration& t) {
+int TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date& d,
+ const time_duration& t, int max_length, char* dst) {
DCHECK(dt_ctx.toks.size() > 0);
- if (dt_ctx.has_date_toks && d.is_special()) return "";
- if (dt_ctx.has_time_toks && t.is_special()) return "";
- string result;
- result.reserve(dt_ctx.fmt_out_len);
+ if (dt_ctx.has_date_toks && d.is_special()) return -1;
+ if (dt_ctx.has_time_toks && t.is_special()) return -1;
+ int pos = 0;
+ char buff[12];
for (const DateTimeFormatToken& tok: dt_ctx.toks) {
int32_t num_val = -1;
switch (tok.type) {
case YEAR:
case ROUND_YEAR: {
- num_val = AdjustYearToLength(d.year(), tok.len);
+ num_val = AdjustYearToLength(d.year(), tok.divisor);
break;
}
case QUARTER_OF_YEAR: {
@@ -253,7 +247,7 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
case MONTH_IN_YEAR: num_val = d.month().as_number(); break;
case MONTH_NAME:
case MONTH_NAME_SHORT: {
- result.append(FormatMonthName(d.month().as_number(), tok));
+ AppendToBuffer(FormatMonthName(d.month().as_number(), tok), dst, pos, max_length);
break;
}
case WEEK_OF_YEAR: {
@@ -276,7 +270,7 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
}
case DAY_NAME:
case DAY_NAME_SHORT: {
- result.append(FormatDayName(d.day_of_week() + 1, tok));
+ AppendToBuffer(FormatDayName(d.day_of_week() + 1, tok), dst, pos, max_length);
break;
}
case HOUR_IN_DAY: num_val = t.hours(); break;
@@ -291,8 +285,8 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
if (t.hours() >= 12) {
indicator_txt = (tok.len == 2) ? &PM : &PM_LONG;
}
- result.append((isupper(*tok.val)) ? indicator_txt->first : indicator_txt->second,
- tok.len);
+ AppendToBuffer((isupper(*tok.val)) ? indicator_txt->first : indicator_txt->second,
+ tok.len, dst, pos, max_length);
break;
}
case MINUTE_IN_HOUR: num_val = t.minutes(); break;
@@ -303,24 +297,24 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
}
case FRACTION: {
num_val = t.fractional_seconds();
- if (num_val > 0) for (int j = tok.len; j < 9; ++j) num_val /= 10;
+ if (num_val > 0 && tok.divisor > 1) num_val /= tok.divisor;
break;
}
case SEPARATOR:
case ISO8601_TIME_INDICATOR:
case ISO8601_ZULU_INDICATOR: {
- result.append(tok.val, tok.len);
+ AppendToBuffer(tok.val, tok.len, dst, pos, max_length);
break;
}
case TZ_OFFSET: {
break;
}
case TEXT: {
- result.append(FormatTextToken(tok));
+ AppendToBuffer(FormatTextToken(tok), dst, pos, max_length);
break;
}
case ISO8601_WEEK_NUMBERING_YEAR: {
- num_val = AdjustYearToLength(GetIso8601WeekNumberingYear(d), tok.len);
+ num_val = AdjustYearToLength(GetIso8601WeekNumberingYear(d), tok.divisor);
break;
}
case ISO8601_WEEK_OF_YEAR: {
@@ -337,14 +331,20 @@ string TimestampParser::Format(const DateTimeFormatContext& dt_ctx, const date&
default: DCHECK(false) << "Unknown date/time format token";
}
if (num_val > -1) {
- string tmp_str = std::to_string(num_val);
- if (!tok.fm_modifier && tmp_str.length() < tok.len) {
- tmp_str.insert(0, tok.len - tmp_str.length(), '0');
+ char* buff_end = FastInt32ToBufferLeft(num_val, &buff[0]);
+ int written_length = buff_end - (&buff[0]);
+ DCHECK_GT(written_length, 0);
+ if (!tok.fm_modifier && written_length < tok.len) {
+ for (int i = (tok.len - written_length); (i > 0) && (pos < max_length); i--) {
+ *(dst + pos) = '0';
+ pos++;
+ }
}
- result.append(tmp_str);
+ AppendToBuffer(&buff[0], written_length, dst, pos, max_length);
}
+ DCHECK_LE(pos, max_length) << "Maximum buffer length exceeded!";
}
- return result;
+ return pos;
}
int TimestampParser::GetIso8601WeekNumberingYear(const boost::gregorian::date& d) {
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index 2630ecd..ad61a81 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -70,11 +70,19 @@ class TimestampParser {
boost::posix_time::time_duration* t) WARN_UNUSED_RESULT;
/// Format the date/time values using the given format context.
+ /// Caller must make sure that it has enough buffer space in 'dst' to hold the output.
+ /// Return total output length that is written into 'dst'. Return -1 If 'd' or 't' is
+ /// invalid.
/// dt_ctx -- date/time format context
/// d -- the date value
/// t -- the time value
- static std::string Format(const datetime_parse_util::DateTimeFormatContext& dt_ctx,
- const boost::gregorian::date& d, const boost::posix_time::time_duration& t);
+ /// max_length -- the maximum length of characters that 'dst' can hold. Only used for
+ /// assertion in debug build. A DCHECK error will be raised if 'max_length' is less
+ /// than required space. However, this method will not write more than 'max_length'.
+ /// dst -- pointer to destination buffer to write the result
+ static int Format(const datetime_parse_util::DateTimeFormatContext& dt_ctx,
+ const boost::gregorian::date& d, const boost::posix_time::time_duration& t,
+ int max_length, char* dst);
private:
/// Helper function finding the correct century for 1 or 2 digit year according to
@@ -109,6 +117,20 @@ class TimestampParser {
/// 'd' date is expected to fall in the [1400, 9999] year range. The returned week
/// numbering year must also fall in the [1400, 9999] range.
static int GetIso8601WeekNumberingYear(const boost::gregorian::date& d);
+
+ /// Helper function for FormatStringVal.
+ static ALWAYS_INLINE void AppendToBuffer(
+ const char* buff, int length_to_copy, char* dst, int& pos, int max_length) {
+ int max_to_copy = std::min(length_to_copy, max_length - pos);
+ std::copy(buff, buff + max_to_copy, dst + pos);
+ pos += length_to_copy;
+ }
+ static ALWAYS_INLINE void AppendToBuffer(
+ const string& str, char* dst, int& pos, int max_length) {
+ int max_to_copy = std::min((int)str.length(), max_length - pos);
+ std::copy(str.cbegin(), str.cbegin() + max_to_copy, dst + pos);
+ pos += str.length();
+ }
};
}
diff --git a/be/src/runtime/timestamp-test.cc b/be/src/runtime/timestamp-test.cc
index bbed2ea..5ef4316 100644
--- a/be/src/runtime/timestamp-test.cc
+++ b/be/src/runtime/timestamp-test.cc
@@ -189,7 +189,8 @@ void TestTimestampTokens(vector<TimestampToken>* toks, int year, int month,
TimestampValue::ParseSimpleDateFormat(val.c_str(), val.length(), dt_ctx);
ValidateTimestamp(tv, fmt, val, fmt_val, year, month, day, hours, mins, secs,
frac);
- string buff = tv.Format(dt_ctx);
+ string buff;
+ tv.Format(dt_ctx, buff);
EXPECT_TRUE(!buff.empty()) << fmt_val;
EXPECT_LE(buff.length(), dt_ctx.fmt_len) << fmt_val;
EXPECT_EQ(buff, val) << fmt_val << " " << buff;
@@ -217,7 +218,8 @@ void TestTimestampTokens(vector<TimestampToken>* toks, int year, int month,
TimestampValue::ParseSimpleDateFormat(val.c_str(), val.length(), dt_ctx);
ValidateTimestamp(tv, fmt, val, fmt_val, year, month, day, hours, mins, secs,
frac);
- string buff = tv.Format(dt_ctx);
+ string buff;
+ tv.Format(dt_ctx, buff);
EXPECT_TRUE(!buff.empty()) << fmt_val;
EXPECT_LE(buff.length(), dt_ctx.fmt_len) << fmt_val;
EXPECT_EQ(buff, val) << fmt_val << " " << buff;
@@ -670,7 +672,8 @@ TEST(TimestampTest, Basic) {
EXPECT_EQ(test_case.expected_fraction, cust_time.fractional_seconds()) << "TC: "
<< i;
if (!test_case.should_format) continue;
- string buff = cust_tv.Format(dt_ctx);
+ string buff;
+ cust_tv.Format(dt_ctx, buff);
EXPECT_TRUE(!buff.empty()) << "TC: " << i;
EXPECT_LE(buff.length(), dt_ctx.fmt_len) << "TC: " << i;
EXPECT_EQ(string(test_case.str, strlen(test_case.str)), buff) << "TC: " << i;
@@ -710,7 +713,8 @@ TEST(TimestampTest, Basic) {
EXPECT_NE(cust_tv.date(), not_a_date) << "TC: " << i;
EXPECT_NE(cust_tv.time(), not_a_date_time) << "TC: " << i;
EXPECT_GE(dt_ctx.fmt_out_len, dt_ctx.fmt_len);
- string buff = cust_tv.Format(dt_ctx);
+ string buff;
+ cust_tv.Format(dt_ctx, buff);
EXPECT_TRUE(!buff.empty()) << "TC: " << i;
EXPECT_LE(buff.length(), dt_ctx.fmt_out_len) << "TC: " << i;
EXPECT_EQ(buff, string(test_case.str, strlen(test_case.str))) << "TC: " << i;
diff --git a/be/src/runtime/timestamp-value.cc b/be/src/runtime/timestamp-value.cc
index 52565cb..ddedebc 100644
--- a/be/src/runtime/timestamp-value.cc
+++ b/be/src/runtime/timestamp-value.cc
@@ -17,12 +17,10 @@
#include "runtime/timestamp-value.h"
-#include <boost/date_time/posix_time/posix_time.hpp>
-
#include "exprs/timestamp-functions.h"
#include "exprs/timezone_db.h"
+#include "runtime/datetime-simple-date-format-parser.h"
#include "runtime/timestamp-parse-util.h"
-#include "runtime/timestamp-value.h"
#include "runtime/timestamp-value.inline.h"
#include "common/names.h"
@@ -44,6 +42,9 @@ const int64_t EPOCH_DAY_NUMBER =
namespace impala {
using datetime_parse_util::DateTimeFormatContext;
+using datetime_parse_util::SimpleDateFormatTokenizer;
+using impala_udf::FunctionContext;
+using impala_udf::StringVal;
const char* TimestampValue::LLVM_CLASS_NAME = "class.impala::TimestampValue";
const double TimestampValue::ONE_BILLIONTH = 0.000000001;
@@ -74,8 +75,15 @@ TimestampValue TimestampValue::ParseIsoSqlFormat(const char* str, int len,
return tv;
}
-string TimestampValue::Format(const DateTimeFormatContext& dt_ctx) const {
- return TimestampParser::Format(dt_ctx, date_, time_);
+void TimestampValue::Format(const DateTimeFormatContext& dt_ctx, string& dst) const {
+ int max_length = dt_ctx.fmt_out_len;
+ dst.resize(max_length);
+ int written = TimestampParser::Format(dt_ctx, date_, time_, max_length, &dst[0]);
+ if (UNLIKELY(written < 0)) {
+ dst.clear();
+ } else {
+ dst.resize(written);
+ }
}
namespace {
@@ -202,16 +210,14 @@ TimestampValue TimestampValue::FromUnixTime(time_t unix_time, const Timezone* lo
}
}
+void TimestampValue::ToString(string& dst) const {
+ Format(*SimpleDateFormatTokenizer::GetDefaultTimestampFormatContext(time_), dst);
+}
+
string TimestampValue::ToString() const {
- stringstream ss;
- if (HasDate()) {
- ss << boost::gregorian::to_iso_extended_string(date_);
- }
- if (HasTime()) {
- if (HasDate()) ss << " ";
- ss << boost::posix_time::to_simple_string(time_);
- }
- return ss.str();
+ string dst;
+ Format(*SimpleDateFormatTokenizer::GetDefaultTimestampFormatContext(time_), dst);
+ return dst;
}
TimestampValue TimestampValue::Add(const boost::posix_time::time_duration& t) const {
diff --git a/be/src/runtime/timestamp-value.h b/be/src/runtime/timestamp-value.h
index 8ad999b..9b630a9 100644
--- a/be/src/runtime/timestamp-value.h
+++ b/be/src/runtime/timestamp-value.h
@@ -184,8 +184,18 @@ class TimestampValue {
bool HasTime() const { return !time_.is_special(); }
bool HasDateAndTime() const { return HasDate() && HasTime(); }
+ /// Write the string representation of this TimestampValue.
+ /// Caller should try to use the variant with output argument and reuse the 'dst' string
+ /// as much as possible if calling ToString multiple times (see IMPALA-10984).
+ void ToString(string& dst) const;
std::string ToString() const;
+ /// Return the StringVal representation of TimestampValue.
+ /// Return StringVal::null() if timestamp is invalid.
+ impala_udf::StringVal ToStringVal(impala_udf::FunctionContext* ctx) const;
+ impala_udf::StringVal ToStringVal(impala_udf::FunctionContext* ctx,
+ const datetime_parse_util::DateTimeFormatContext& dt_ctx) const;
+
/// Verifies that the date falls into a valid range (years 1400..9999).
static inline bool IsValidDate(const boost::gregorian::date& date) {
// Smallest valid day number.
@@ -217,9 +227,12 @@ class TimestampValue {
&& time.total_nanoseconds() < NANOS_PER_DAY;
}
- /// Formats the timestamp using the given date/time context and returns the result.
- /// dt_ctx -- the date/time context containing the format to use
- std::string Format(const datetime_parse_util::DateTimeFormatContext& dt_ctx) const;
+ /// Formats the timestamp using the given date/time context and write the result to
+ /// destination string. The destination string will be cleared if timestamp is invalid.
+ /// dt_ctx -- the date/time context containing the format to use.
+ /// dst -- destination string where the result should be written into.
+ void Format(
+ const datetime_parse_util::DateTimeFormatContext& dt_ctx, string& dst) const;
/// Interpret 'this' as a timestamp in UTC and convert to unix time.
/// Returns false if the conversion failed ('unix_time' will be undefined), otherwise
diff --git a/be/src/runtime/timestamp-value.inline.h b/be/src/runtime/timestamp-value.inline.h
index 9703d70..d49dbb2 100644
--- a/be/src/runtime/timestamp-value.inline.h
+++ b/be/src/runtime/timestamp-value.inline.h
@@ -26,12 +26,20 @@
#include <chrono>
#include "exprs/timezone_db.h"
-#include "kudu/util/int128.h"
#include "gutil/walltime.h"
+#include "kudu/util/int128.h"
+#include "runtime/datetime-simple-date-format-parser.h"
+#include "runtime/timestamp-parse-util.h"
+#include "udf/udf.h"
#include "util/arithmetic-util.h"
namespace impala {
+using datetime_parse_util::DateTimeFormatContext;
+using datetime_parse_util::SimpleDateFormatTokenizer;
+using impala_udf::FunctionContext;
+using impala_udf::StringVal;
+
template <int32_t TICKS_PER_SEC>
inline TimestampValue TimestampValue::UtcFromUnixTimeTicks(int64_t unix_time_ticks) {
static const boost::gregorian::date EPOCH(1970,1,1);
@@ -206,6 +214,25 @@ inline bool TimestampValue::ToSubsecondUnixTime(const Timezone* local_tz,
return true;
}
+inline StringVal TimestampValue::ToStringVal(
+ FunctionContext* ctx, const DateTimeFormatContext& dt_ctx) const {
+ int max_length = dt_ctx.fmt_out_len;
+ StringVal sv(ctx, max_length);
+ int written = TimestampParser::Format(
+ dt_ctx, date_, time_, max_length, reinterpret_cast<char*>(sv.ptr));
+ if (UNLIKELY(written < 0)) {
+ sv.is_null = true;
+ } else {
+ sv.Resize(ctx, written);
+ }
+ return sv;
+}
+
+inline StringVal TimestampValue::ToStringVal(FunctionContext* ctx) const {
+ const DateTimeFormatContext* dt_ctx =
+ SimpleDateFormatTokenizer::GetDefaultTimestampFormatContext(time_);
+ return ToStringVal(ctx, *dt_ctx);
+}
}
#endif
diff --git a/be/src/util/min-max-filter.cc b/be/src/util/min-max-filter.cc
index aaa2f95..f2a2c56 100644
--- a/be/src/util/min-max-filter.cc
+++ b/be/src/util/min-max-filter.cc
@@ -1163,7 +1163,7 @@ string MinMaxFilter::DebugString(const ColumnValuePB& v, const ColumnType& col_t
} else if (v.has_binary_val()) {
ss << v.binary_val();
} else if (v.has_timestamp_val()) {
- ss << TimestampValue::FromColumnValuePB(v).ToString();
+ ss << TimestampValue::FromColumnValuePB(v);
} else if (v.has_decimal_val()) {
double d = 0.0;
switch (col_type.GetByteSize()) {