You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/08/03 04:21:08 UTC
[1/2] incubator-impala git commit: Bump Kudu version to 1c0276a
Repository: incubator-impala
Updated Branches:
refs/heads/master 3f82d1570 -> 5797f8596
Bump Kudu version to 1c0276a
Change-Id: I7e4bf59595dd483ac12f4f9c4b8a7da8c5c43e91
Reviewed-on: http://gerrit.cloudera.org:8080/7565
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/748fe94a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/748fe94a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/748fe94a
Branch: refs/heads/master
Commit: 748fe94a916d7dfc5781b3711c8484f8ae508cab
Parents: 3f82d15
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Wed Aug 2 09:08:40 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Aug 2 21:07:40 2017 +0000
----------------------------------------------------------------------
bin/impala-config.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/748fe94a/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2699339..634430b 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -72,7 +72,7 @@ fi
# moving to a different build of the toolchain, e.g. when a version is bumped or a
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=438-a80bf530fe
+export IMPALA_TOOLCHAIN_BUILD_ID=443-4c9c3b7328
# Versions of toolchain dependencies.
# -----------------------------------
export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -120,7 +120,7 @@ if [[ $OSTYPE == "darwin"* ]]; then
fi
# Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.
-export IMPALA_KUDU_VERSION=27854fd
+export IMPALA_KUDU_VERSION=1c0276a
# Kudu version used to identify Java client jar from maven
export KUDU_JAVA_VERSION=1.5.0-cdh5.13.0-SNAPSHOT
[2/2] incubator-impala git commit: IMPALA-3894: Change the behavior
parsing date "YY"
Posted by ta...@apache.org.
IMPALA-3894: Change the behavior parsing date "YY"
This patch change the behavor when running unix_timestamp(string, string)
function. Before the change Impala directly adds 2000 to the year parsed.
Behavior after change is the same as Hive's, shifting the parsed date
into the interval [current time - 80 years, current time + 20 years).
In 2017, given query
> select from_unixtime(unix_timestamp('31-AUG-94', 'dd-MMM-yy'),'yyyyMMdd');
Impala would output 20940831 before the change and 19940831 with this
patch applied. unix_timestamp function with other forms of parameters
is not affected.
Hive's impelentation can be found at:
https://github.com/apache/hive/blob/39d66a439c02ea2b5c7501b362c0d8f9b8d22cc0/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionDatetime.java#L159
It uses SimpleDateFormat, documented at:
https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
Test: add 4 test cases to timestamp-test.cc, testing the edge cases of
2-year format.
Change-Id: I5da761255915dc741f1dcc488fd4ef6ecc385896
Reviewed-on: http://gerrit.cloudera.org:8080/7530
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5797f859
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5797f859
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5797f859
Branch: refs/heads/master
Commit: 5797f859692ced6f30a790568b29669c663fa65c
Parents: 748fe94
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Thu Jul 27 16:51:23 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Aug 3 02:20:48 2017 +0000
----------------------------------------------------------------------
be/src/exprs/timestamp-functions.cc | 1 +
be/src/runtime/timestamp-parse-util.cc | 31 +++++++++++++++++++++++++++--
be/src/runtime/timestamp-parse-util.h | 11 ++++++++++
be/src/runtime/timestamp-test.cc | 12 +++++++++++
4 files changed, 53 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5797f859/be/src/exprs/timestamp-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc
index 93a16cd..9cb5430 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -184,6 +184,7 @@ void TimestampFunctions::UnixAndFromUnixPrepare(
// This is much cheaper vs alloc/dealloc'ing a context for each evaluation.
dt_ctx = new DateTimeFormatContext();
}
+ dt_ctx->SetCenturyBreak(*context->impl()->state()->now());
context->SetFunctionState(scope, dt_ctx);
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5797f859/be/src/runtime/timestamp-parse-util.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index d0c595f..9b9e8c8 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -19,10 +19,10 @@
#include <boost/assign/list_of.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>
-#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/unordered_map.hpp>
#include "runtime/string-value.inline.h"
+#include "runtime/timestamp-value.h"
#include "util/string-parser.h"
namespace assign = boost::assign;
@@ -58,6 +58,12 @@ struct DateTimeParseResult {
}
};
+void DateTimeFormatContext::SetCenturyBreak(const TimestampValue &now) {
+ const date& now_date = now.date();
+ century_break_ptime = boost::posix_time::ptime(
+ date(now_date.year() - 80, now_date.month(), now_date.day()), now.time());
+}
+
bool TimestampParser::initialized_ = false;
/// Lazily initialized pseudo-constant hashmap for mapping month names to an index.
@@ -422,6 +428,8 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
// Keep track of the number of characters we need to shift token positions by.
// Variable-length tokens will result in values > 0;
int shift_len = 0;
+ // Whether to realign the year for 2-digit year format
+ bool realign_year = false;
for (const DateTimeFormatToken& tok: dt_ctx.toks) {
const char* tok_val = str + tok.pos + shift_len;
if (tok.type == SEPARATOR) {
@@ -442,7 +450,9 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
dt_result->year = StringParser::StringToInt<int>(tok_val, tok_len, &status);
if (UNLIKELY(StringParser::PARSE_SUCCESS != status)) return false;
if (UNLIKELY(dt_result->year < 1 || dt_result->year > 9999)) return false;
- if (tok_len < 4 && dt_result->year < 99) dt_result->year += 2000;
+ // Year in "Y" and "YY" format should be in the interval
+ // [current time - 80 years, current time + 20 years)
+ if (tok_len <= 2) realign_year = true;
break;
}
case MONTH_IN_YEAR: {
@@ -536,6 +546,23 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
default: DCHECK(false) << "Unknown date/time format token";
}
}
+ // Hive uses Java's SimpleDateFormat to parse timestamp:
+ // In SimpleDateFormat, the century for 2-digit-year breaks at current_time - 80 years.
+ // https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
+ if (realign_year) {
+ DCHECK(!dt_ctx.century_break_ptime.is_special());
+ // Let the century start at AABB and the year parsed be YY, this gives us AAYY.
+ dt_result->year += (dt_ctx.century_break_ptime.date().year() / 100) * 100;
+ date parsed_date(dt_result->year, dt_result->month, dt_result->day);
+ time_duration parsed_time(dt_result->hour, dt_result->minute, dt_result->second,
+ dt_result->fraction);
+ // Advance 100 years if parsed time is before the century break
+ // For example if the century breaks at 1937 but dt_result->year = 1936,
+ // the correct year would be 2036.
+ if (boost::posix_time::ptime(parsed_date, parsed_time) < dt_ctx.century_break_ptime) {
+ dt_result->year += 100;
+ }
+ }
return true;
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5797f859/be/src/runtime/timestamp-parse-util.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index 8611f87..b68cc85 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -20,6 +20,7 @@
#include <cstddef>
#include <vector>
+#include <boost/date_time/posix_time/ptime.hpp>
namespace boost {
namespace gregorian {
@@ -33,6 +34,7 @@ namespace boost {
namespace impala {
struct DateTimeParseResult;
+class TimestampValue;
/// Add support for dealing with custom date/time formats in Impala. The following
/// date/time tokens are supported:
@@ -134,6 +136,9 @@ struct DateTimeFormatContext {
std::vector<DateTimeFormatToken> toks;
bool has_date_toks;
bool has_time_toks;
+ /// Current time - 80 years to determine the actual year when
+ /// parsing 1 or 2-digit year token.
+ boost::posix_time::ptime century_break_ptime;
DateTimeFormatContext() {
Reset(NULL, 0);
@@ -143,6 +148,11 @@ struct DateTimeFormatContext {
Reset(fmt, fmt_len);
}
+ /// Set the century break when parsing 1 or 2-digit year format.
+ /// When parsing 1 or 2-digit year, the year should be in the interval
+ /// [now - 80 years, now + 20 years), according to Hive.
+ void SetCenturyBreak(const TimestampValue &now);
+
void Reset(const char* fmt, int fmt_len) {
this->fmt = fmt;
this->fmt_len = fmt_len;
@@ -150,6 +160,7 @@ struct DateTimeFormatContext {
this->has_date_toks = false;
this->has_time_toks = false;
this->toks.clear();
+ this->century_break_ptime = boost::posix_time::not_a_date_time;
}
};
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5797f859/be/src/runtime/timestamp-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-test.cc b/be/src/runtime/timestamp-test.cc
index c18313b..5f3e0e6 100644
--- a/be/src/runtime/timestamp-test.cc
+++ b/be/src/runtime/timestamp-test.cc
@@ -230,6 +230,9 @@ void TestTimestampTokens(vector<TimestampToken>* toks, int year, int month,
}
TEST(TimestampTest, Basic) {
+ // Fix current time to determine the behavior parsing 2-digit year format
+ TimestampValue now(date(2017, 7, 28), time_duration(16, 14, 24));
+
char s1[] = "2012-01-20 01:10:01";
char s2[] = "1990-10-20 10:10:10.123456789 ";
char s3[] = " 1990-10-20 10:10:10.123456789";
@@ -472,6 +475,14 @@ TEST(TimestampTest, Basic) {
// Test short year token
(TimestampTC("y-MM-dd", "2013-11-13", false, true, false, 2013, 11, 13))
(TimestampTC("y-MM-dd", "13-11-13", false, true, false, 2013, 11, 13))
+ // Test 2-digit year format
+ (TimestampTC("yy-MM-dd", "37-07-28", false, true, false, 2037, 7, 28))
+ (TimestampTC("yy-MM-dd", "37-07-29", false, true, false, 1937, 7, 29))
+ // Test 1-digit year format with time to show the exact boundary
+ (TimestampTC("y-MM-dd HH:mm:ss", "37-07-28 16:14:23", false, true, false,
+ 2037, 7, 28, 16, 14, 23))
+ (TimestampTC("y-MM-dd HH:mm:ss", "37-07-28 16:14:24", false, true, false,
+ 1937, 7, 28, 16, 14, 24))
// Test short month token
(TimestampTC("yyyy-M-dd", "2013-11-13", false, true, false, 2013, 11, 13))
(TimestampTC("yyyy-M-dd", "2013-1-13", false, true, false, 2013, 1, 13))
@@ -501,6 +512,7 @@ TEST(TimestampTest, Basic) {
for (int i = 0; i < test_cases.size(); ++i) {
TimestampTC test_case = test_cases[i];
DateTimeFormatContext dt_ctx(test_case.fmt, strlen(test_case.fmt));
+ dt_ctx.SetCenturyBreak(now);
bool parse_result = TimestampParser::ParseFormatTokens(&dt_ctx);
if (test_case.fmt_should_fail) {
EXPECT_FALSE(parse_result) << "TC: " << i;