You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/09/08 16:50:47 UTC
[1/3] incubator-impala git commit: Bump Kudu version to a71ecfd
Repository: incubator-impala
Updated Branches:
refs/heads/master e993b9712 -> 2fbdc8e37
Bump Kudu version to a71ecfd
Change-Id: Ie23d852f0d630f9484d8ae4f772af6bba13ea24f
Reviewed-on: http://gerrit.cloudera.org:8080/8000
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/72b7e1cc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/72b7e1cc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/72b7e1cc
Branch: refs/heads/master
Commit: 72b7e1cc12a17fab2e8e72eb1db898388b8814fd
Parents: e993b97
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Thu Sep 7 11:51:43 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 23:00:16 2017 +0000
----------------------------------------------------------------------
bin/impala-config.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/72b7e1cc/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index f3354f1..c3a25df 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -72,7 +72,7 @@ fi
# moving to a different build of the toolchain, e.g. when a version is bumped or a
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=459-0157f69796
+export IMPALA_TOOLCHAIN_BUILD_ID=462-a06b20680a
# Versions of toolchain dependencies.
# -----------------------------------
export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -120,7 +120,7 @@ if [[ $OSTYPE == "darwin"* ]]; then
fi
# Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.
-export IMPALA_KUDU_VERSION=1c70e5d
+export IMPALA_KUDU_VERSION=a71ecfd
# Kudu version used to identify Java client jar from maven
export KUDU_JAVA_VERSION=1.6.0-cdh5.14.0-SNAPSHOT
[3/3] incubator-impala git commit: IMPALA-5867: Fix bugs parsing
2-digit year
Posted by mj...@apache.org.
IMPALA-5867: Fix bugs parsing 2-digit year
This patch fixes several bugs parsing 1 or 2-digit year formats.
Existing code is broken in several ways:
1. With 1 or 2-digit year format and month/day missing, ParseDateTime()
throws an uncaught exception.
2. If now() is 02/29 in a leap year but (now() - 80 years) isn't,
DateTimeFormatContext::SetCenturyBreak() throws an uncaught
exception.
3. If the year parsed is 02/29 in a leap year but it isn't a leap year
100 years ago, TimestampParser::Parse() will consider the date as
invalid though it isn't.
This patch fixes above bugs and adds a few test cases in
be/src/runtime/timestamp-test.cc
The behaviors after change is:
1. A date without month or day is considered invalid. This is a
pre-existing difference from Hive, which defaults missing month/day
to 01/01.
2. Century break would be set to 02/28 80 years ago.
3. If parsed date is 00/02/29 but 1900/02/29 does not exist, treat
it as 03/01 when comparing to century break.
Change-Id: Ia4f430caea88b6c33f8050a1984ee0ee32ecb0a1
Reviewed-on: http://gerrit.cloudera.org:8080/7910
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2fbdc8e3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2fbdc8e3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2fbdc8e3
Branch: refs/heads/master
Commit: 2fbdc8e37e4cb0a3b3408e90b5a972d778fea7eb
Parents: ac68913
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Wed Aug 30 14:14:52 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Sep 8 03:05:11 2017 +0000
----------------------------------------------------------------------
be/src/runtime/timestamp-parse-util.cc | 95 +++++++++++++++++------------
be/src/runtime/timestamp-parse-util.h | 10 +++
be/src/runtime/timestamp-test.cc | 49 +++++++++++----
3 files changed, 104 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fbdc8e3/be/src/runtime/timestamp-parse-util.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index 9b9e8c8..e64d904 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -29,8 +29,10 @@ namespace assign = boost::assign;
using boost::unordered_map;
using boost::gregorian::date;
using boost::gregorian::date_duration;
+using boost::gregorian::gregorian_calendar;
using boost::posix_time::hours;
using boost::posix_time::not_a_date_time;
+using boost::posix_time::ptime;
using boost::posix_time::time_duration;
namespace impala {
@@ -45,6 +47,8 @@ struct DateTimeParseResult {
int second;
int32_t fraction;
boost::posix_time::time_duration tz_offset;
+ // Whether to realign the year for 2-digit year format
+ bool realign_year;
DateTimeParseResult()
: year(0),
@@ -54,14 +58,22 @@ struct DateTimeParseResult {
minute(0),
second(0),
fraction(0),
- tz_offset(0,0,0,0) {
+ tz_offset(0,0,0,0),
+ realign_year(false) {
}
};
void DateTimeFormatContext::SetCenturyBreak(const TimestampValue &now) {
- const date& now_date = now.date();
- century_break_ptime = boost::posix_time::ptime(
- date(now_date.year() - 80, now_date.month(), now_date.day()), now.time());
+ auto& now_date = now.date();
+ // If the century break is at an invalid 02/29, set it to 02/28 for consistency with
+ // Hive.
+ if (now_date.month() == 2 && now_date.day() == 29 &&
+ !gregorian_calendar::is_leap_year(now_date.year() - 80)) {
+ century_break_ptime = ptime(date(now_date.year() - 80, 2, 28), now.time());
+ } else {
+ century_break_ptime = ptime(
+ date(now_date.year() - 80, now_date.month(), now_date.day()), now.time());
+ }
}
bool TimestampParser::initialized_ = false;
@@ -301,6 +313,32 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
}
}
+date TimestampParser::RealignYear(const DateTimeParseResult& dt_result,
+ const DateTimeFormatContext& dt_ctx, int day_offset, const time_duration& t) {
+ DCHECK(!dt_ctx.century_break_ptime.is_special());
+ // Let the century start at AABB and the year parsed be YY, this gives us AAYY.
+ int year = dt_result.year + (dt_ctx.century_break_ptime.date().year() / 100) * 100;
+ date unshifted_date;
+ // The potential actual date (02/29 in unshifted year + 100 years) might be valid
+ // even if unshifted date is not, so try to make unshifted date valid by adding 1 day.
+ // This makes the behavior closer to Hive.
+ if (dt_result.month == 2 && dt_result.day == 29 &&
+ !gregorian_calendar::is_leap_year(year)) {
+ unshifted_date = date(year, 3, 1);
+ } else {
+ unshifted_date = date(year, dt_result.month, dt_result.day);
+ }
+ unshifted_date += date_duration(day_offset);
+ // Advance 100 years if parsed time is before the century break.
+ // For example if the century breaks at 1937 but dt_result->year = 1936,
+ // the correct year would be 2036.
+ if (ptime(unshifted_date, t) < dt_ctx.century_break_ptime) {
+ return date(year + 100, dt_result.month, dt_result.day) + date_duration(day_offset);
+ } else {
+ return date(year, dt_result.month, dt_result.day) + date_duration(day_offset);
+ }
+}
+
bool TimestampParser::Parse(const char* str, int len, const DateTimeFormatContext& dt_ctx,
date* d, time_duration* t) {
DCHECK(TimestampParser::initialized_);
@@ -330,25 +368,23 @@ bool TimestampParser::Parse(const char* str, int len, const DateTimeFormatContex
*t = time_duration(0, 0, 0, 0);
}
if (dt_ctx.has_date_toks) {
- bool is_valid_date = true;
try {
DCHECK(-1 <= day_offset && day_offset <= 1);
- if ((dt_result.year == 1400 && dt_result.month == 1 && dt_result.day == 1 &&
- day_offset == -1) ||
- (dt_result.year == 9999 && dt_result.month == 12 && dt_result.day == 31 &&
- day_offset == 1)) {
- // Have to check lower/upper bound explicitly.
- // Tried date::is_not_a_date_time() but it doesn't complain value is out of range
- // for "'1400-01-01' - 1 day" and "'9999-12-31' + 1 day".
- is_valid_date = false;
+ if (dt_result.realign_year) {
+ *d = RealignYear(dt_result, dt_ctx, day_offset, *t);
} else {
- *d = date(dt_result.year, dt_result.month, dt_result.day);
- *d += date_duration(day_offset);
+ *d = date(dt_result.year, dt_result.month, dt_result.day)
+ + date_duration(day_offset);
+ }
+ // Have to check year lower/upper bound [1400, 9999] here because
+ // operator + (date, date_duration) won't throw an exception even if the result is
+ // out-of-range.
+ if (d->year() < 1400 || d->year() > 9999) {
+ // Calling year() on out-of-range date throws an exception itself. This branch is
+ // to describe the checking logic but is never taken.
+ DCHECK(false);
}
} catch (boost::exception&) {
- is_valid_date = false;
- }
- if (!is_valid_date) {
VLOG_ROW << "Invalid date: " << dt_result.year << "-" << dt_result.month << "-"
<< dt_result.day;
*d = date();
@@ -428,8 +464,6 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
// Keep track of the number of characters we need to shift token positions by.
// Variable-length tokens will result in values > 0;
int shift_len = 0;
- // Whether to realign the year for 2-digit year format
- bool realign_year = false;
for (const DateTimeFormatToken& tok: dt_ctx.toks) {
const char* tok_val = str + tok.pos + shift_len;
if (tok.type == SEPARATOR) {
@@ -449,10 +483,10 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
case YEAR: {
dt_result->year = StringParser::StringToInt<int>(tok_val, tok_len, &status);
if (UNLIKELY(StringParser::PARSE_SUCCESS != status)) return false;
- if (UNLIKELY(dt_result->year < 1 || dt_result->year > 9999)) return false;
+ if (UNLIKELY(dt_result->year < 0 || dt_result->year > 9999)) return false;
// Year in "Y" and "YY" format should be in the interval
// [current time - 80 years, current time + 20 years)
- if (tok_len <= 2) realign_year = true;
+ if (tok_len <= 2) dt_result->realign_year = true;
break;
}
case MONTH_IN_YEAR: {
@@ -546,23 +580,6 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
default: DCHECK(false) << "Unknown date/time format token";
}
}
- // Hive uses Java's SimpleDateFormat to parse timestamp:
- // In SimpleDateFormat, the century for 2-digit-year breaks at current_time - 80 years.
- // https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
- if (realign_year) {
- DCHECK(!dt_ctx.century_break_ptime.is_special());
- // Let the century start at AABB and the year parsed be YY, this gives us AAYY.
- dt_result->year += (dt_ctx.century_break_ptime.date().year() / 100) * 100;
- date parsed_date(dt_result->year, dt_result->month, dt_result->day);
- time_duration parsed_time(dt_result->hour, dt_result->minute, dt_result->second,
- dt_result->fraction);
- // Advance 100 years if parsed time is before the century break
- // For example if the century breaks at 1937 but dt_result->year = 1936,
- // the correct year would be 2036.
- if (boost::posix_time::ptime(parsed_date, parsed_time) < dt_ctx.century_break_ptime) {
- dt_result->year += 100;
- }
- }
return true;
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fbdc8e3/be/src/runtime/timestamp-parse-util.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index b68cc85..bbcc03f 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -219,6 +219,16 @@ class TimestampParser {
static bool ParseDateTime(const char* str, int str_len,
const DateTimeFormatContext& dt_ctx, DateTimeParseResult* dt_result);
+ /// Helper function finding the correct century for 1 or 2 digit year according to
+ /// century break. Throws bad_year, bad_day_of_month, or bad_day_month if the date is
+ /// invalid. The century break behavior is copied from Java SimpleDateFormat in order to
+ /// be consistent with Hive.
+ /// In SimpleDateFormat, the century for 2-digit-year breaks at current_time - 80 years.
+ /// https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
+ static boost::gregorian::date RealignYear(const DateTimeParseResult& dt_result,
+ const DateTimeFormatContext& dt_ctx, int day_offset,
+ const boost::posix_time::time_duration& t);
+
/// Check if the string is a TimeZone offset token.
/// Valid offset token format are 'hh:mm', 'hhmm', 'hh'.
static bool IsValidTZOffset(const char* str_begin, const char* str_end);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fbdc8e3/be/src/runtime/timestamp-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-test.cc b/be/src/runtime/timestamp-test.cc
index 5f3e0e6..b8919cc 100644
--- a/be/src/runtime/timestamp-test.cc
+++ b/be/src/runtime/timestamp-test.cc
@@ -231,7 +231,8 @@ void TestTimestampTokens(vector<TimestampToken>* toks, int year, int month,
TEST(TimestampTest, Basic) {
// Fix current time to determine the behavior parsing 2-digit year format
- TimestampValue now(date(2017, 7, 28), time_duration(16, 14, 24));
+ // Set it to 03/01 to test 02/29 edge cases.
+ TimestampValue now(date(1980, 3, 1), time_duration(16, 14, 24));
char s1[] = "2012-01-20 01:10:01";
char s2[] = "1990-10-20 10:10:10.123456789 ";
@@ -426,6 +427,19 @@ TEST(TimestampTest, Basic) {
}
// Test parsing/formatting of complex date/time formats
vector<TimestampTC> test_cases = boost::assign::list_of
+ // Test year upper/lower bound
+ (TimestampTC("yyyy-MM-dd HH:mm:ss", "1400-01-01 00:00:00",
+ false, true, false, 1400, 1, 1))
+ (TimestampTC("yyyy-MM-dd HH:mm:ss", "1399-12-31 23:59:59",
+ false, true))
+ (TimestampTC("yyyy-MM-dd HH:mm:ss", "9999-12-31 23:59:59",
+ false, true, false, 9999, 12, 31, 23, 59, 59))
+ (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "1400-01-01 01:00:00 +01", false, true, false,
+ 1400, 1, 1, 0, 0, 0))
+ (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "1400-01-01 01:00:00 +02", false, true))
+ (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "9999-12-31 22:00:00 -01", false, true, false,
+ 9999, 12, 31, 23, 0, 0))
+ (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "9999-12-31 22:00:00 -02", false, true))
// Test case on literal short months
(TimestampTC("yyyy-MMM-dd", "2013-OCT-01", false, true, false, 2013, 10, 1))
// Test case on literal short months
@@ -470,19 +484,32 @@ TEST(TimestampTest, Basic) {
(TimestampTC("MMdd", "1201", false, true))
// Test missing month
(TimestampTC("yyyydd", "201301", false, true))
- // Test missing month
- (TimestampTC("yyyymm", "201301", false, true))
+ (TimestampTC("yydd", "1301", false, true))
+ // Test missing day
+ (TimestampTC("yyyyMM", "201301", false, true))
+ (TimestampTC("yyMM", "8512", false, true))
+ // Test missing month and day
+ (TimestampTC("yyyy", "2013", false, true))
+ (TimestampTC("yy", "13", false, true))
// Test short year token
(TimestampTC("y-MM-dd", "2013-11-13", false, true, false, 2013, 11, 13))
- (TimestampTC("y-MM-dd", "13-11-13", false, true, false, 2013, 11, 13))
+ (TimestampTC("y-MM-dd", "13-11-13", false, true, false, 1913, 11, 13))
// Test 2-digit year format
- (TimestampTC("yy-MM-dd", "37-07-28", false, true, false, 2037, 7, 28))
- (TimestampTC("yy-MM-dd", "37-07-29", false, true, false, 1937, 7, 29))
+ (TimestampTC("yy-MM-dd", "17-08-31", false, true, false, 1917, 8, 31))
+ (TimestampTC("yy-MM-dd", "99-08-31", false, true, false, 1999, 8, 31))
+ // Test 02/29 edge cases of 2-digit year format
+ (TimestampTC("yy-MM-dd", "00-02-28", false, true, false, 2000, 2, 28))
+ (TimestampTC("yy-MM-dd", "00-02-29", false, true, false, 2000, 2, 29))
+ (TimestampTC("yy-MM-dd", "00-03-01", false, true, false, 2000, 3, 1))
+ (TimestampTC("yy-MM-dd", "00-03-02", false, true, false, 1900, 3, 2))
+ (TimestampTC("yy-MM-dd", "04-02-29", false, true, false, 1904, 2, 29))
+ (TimestampTC("yy-MM-dd", "99-02-29", false, true))
// Test 1-digit year format with time to show the exact boundary
- (TimestampTC("y-MM-dd HH:mm:ss", "37-07-28 16:14:23", false, true, false,
- 2037, 7, 28, 16, 14, 23))
- (TimestampTC("y-MM-dd HH:mm:ss", "37-07-28 16:14:24", false, true, false,
- 1937, 7, 28, 16, 14, 24))
+ // Before the cutoff. Year should be 2000
+ (TimestampTC("y-MM-dd HH:mm:ss", "00-02-29 16:14:23", false, true, false,
+ 2000, 2, 29, 16, 14, 23))
+ // After the cutoff but 02/29/1900 is invalid
+ (TimestampTC("y-MM-dd HH:mm:ss", "00-02-29 16:14:24", false, true))
// Test short month token
(TimestampTC("yyyy-M-dd", "2013-11-13", false, true, false, 2013, 11, 13))
(TimestampTC("yyyy-M-dd", "2013-1-13", false, true, false, 2013, 1, 13))
@@ -491,7 +518,7 @@ TEST(TimestampTest, Basic) {
(TimestampTC("yyyy-MM-d", "2013-11-3", false, true, false, 2013, 11, 3))
// Test short all date tokens
(TimestampTC("y-M-d", "2013-11-13", false, true, false, 2013, 11, 13))
- (TimestampTC("y-M-d", "13-1-3", false, true, false, 2013, 1, 3))
+ (TimestampTC("y-M-d", "13-1-3", false, true, false, 1913, 1, 3))
// Test short hour token
(TimestampTC("H:mm:ss", "14:24:34", false, false, true, 0, 0, 0, 14, 24, 34))
(TimestampTC("H:mm:ss", "4:24:34", false, false, true, 0, 0, 0, 4, 24, 34))
[2/3] incubator-impala git commit: IMPALA-2107: [DOCS] Document
base64*code() functions
Posted by mj...@apache.org.
IMPALA-2107: [DOCS] Document base64*code() functions
base64decode()
base64encode()
Change-Id: I5251e368ad36756c19a7b97e5ef6f232f616189b
Reviewed-on: http://gerrit.cloudera.org:8080/7963
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ac689131
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ac689131
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ac689131
Branch: refs/heads/master
Commit: ac689131190f5bf01a7c0a4892c30647139e7d32
Parents: 72b7e1c
Author: John Russell <jr...@cloudera.com>
Authored: Tue Aug 29 17:15:50 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 23:41:25 2017 +0000
----------------------------------------------------------------------
docs/impala_keydefs.ditamap | 3 +
docs/shared/impala_common.xml | 117 +++++++++++++++++++++++++++
docs/topics/impala_string_functions.xml | 53 ++++++++++++
3 files changed, 173 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index cdcaed6..518afef 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -143,6 +143,9 @@ under the License.
<keydef href="http://www.lzop.org/" scope="external" format="html" keys="lzop.org"/>
<!-- Links to Wikipedia pages for background on industry terminology. -->
+ <keydef href="https://en.wikipedia.org/wiki/Base64" scope="external" format="html" keys="base64">
+ <topicmeta><linktext>Base64 article on Wikipedia</linktext></topicmeta>
+ </keydef>
<keydef href="http://en.wikipedia.org/wiki/.htpasswd" scope="external" format="html" keys=".htpasswd"/>
<keydef href="http://en.wikipedia.org/wiki/Coordinated_Universal_Time" scope="external" format="html" keys="Coordinated_Universal_Time"/>
<keydef href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function" scope="external" format="html" keys="wiki_fnv"/>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 5d79acc..9d6f72b 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -773,6 +773,123 @@ select concat('abc','mno','xyz');</codeblock>
HBase tables.
</p>
+ <p id="base64_charset">
+ The set of characters that can be generated as output
+ from <codeph>base64encode()</codeph>, or specified in
+ the argument string to <codeph>base64decode()</codeph>,
+ are the ASCII uppercase and lowercase letters (A-Z, a-z),
+ digits (0-9), and the punctuation characters
+ <codeph>+</codeph>, <codeph>/</codeph>, and <codeph>=</codeph>.
+ </p>
+
+ <p id="base64_error_handling">
+ If the argument string to <codeph>base64decode()</codeph> does
+ not represent a valid base64-encoded value, subject to the
+ constraints of the Impala implementation such as the allowed
+ character set, the function returns <codeph>NULL</codeph>.
+ </p>
+
+ <p id="base64_use_cases">
+ The functions <codeph>base64encode()</codeph> and
+ <codeph>base64decode()</codeph> are typically used
+ in combination, to store in an Impala table string data that is
+ problematic to store or transmit. For example, you could use
+ these functions to store string data that uses an encoding
+ other than UTF-8, or to transform the values in contexts that
+ require ASCII values, such as for partition key columns.
+ Keep in mind that base64-encoded values produce different results
+ for string functions such as <codeph>LENGTH()</codeph>,
+ <codeph>MAX()</codeph>, and <codeph>MIN()</codeph> than when
+ those functions are called with the unencoded string values.
+ </p>
+
+ <p id="base64_alignment">
+ All return values produced by <codeph>base64encode()</codeph>
+ are a multiple of 4 bytes in length. All argument values
+ supplied to <codeph>base64decode()</codeph> must also be a
+ multiple of 4 bytes in length. If a base64-encoded value
+ would otherwise have a different length, it can be padded
+ with trailing <codeph>=</codeph> characters to reach a length
+ that is a multiple of 4 bytes.
+ </p>
+
+ <p id="base64_examples">
+ The following examples show how to use <codeph>base64encode()</codeph>
+ and <codeph>base64decode()</codeph> together to store and retrieve
+ string values:
+<codeblock>
+-- An arbitrary string can be encoded in base 64.
+-- The length of the output is a multiple of 4 bytes,
+-- padded with trailing = characters if necessary.
+select base64encode('hello world') as encoded,
+ length(base64encode('hello world')) as length;
++------------------+--------+
+| encoded | length |
++------------------+--------+
+| aGVsbG8gd29ybGQ= | 16 |
++------------------+--------+
+
+-- Passing an encoded value to base64decode() produces
+-- the original value.
+select base64decode('aGVsbG8gd29ybGQ=') as decoded;
++-------------+
+| decoded |
++-------------+
+| hello world |
++-------------+
+</codeblock>
+
+ These examples demonstrate incorrect encoded values that
+ produce <codeph>NULL</codeph> return values when decoded:
+
+<codeblock>
+-- The input value to base64decode() must be a multiple of 4 bytes.
+-- In this case, leaving off the trailing = padding character
+-- produces a NULL return value.
+select base64decode('aGVsbG8gd29ybGQ') as decoded;
++---------+
+| decoded |
++---------+
+| NULL |
++---------+
+WARNINGS: UDF WARNING: Invalid base64 string; input length is 15,
+ which is not a multiple of 4.
+
+-- The input to base64decode() can only contain certain characters.
+-- The $ character in this case causes a NULL return value.
+select base64decode('abc$');
++----------------------+
+| base64decode('abc$') |
++----------------------+
+| NULL |
++----------------------+
+WARNINGS: UDF WARNING: Could not base64 decode input in space 4; actual output length 0
+</codeblock>
+
+ These examples demonstrate <q>round-tripping</q> of an original string to an
+ encoded string, and back again. This technique is applicable if the original
+ source is in an unknown encoding, or if some intermediate processing stage
+ might cause national characters to be misrepresented:
+
+<codeblock>
+select 'circumflex accents: â, ê, î, ô, û' as original,
+ base64encode('circumflex accents: â, ê, î, ô, û') as encoded;
++-----------------------------------+------------------------------------------------------+
+| original | encoded |
++-----------------------------------+------------------------------------------------------+
+| circumflex accents: â, ê, î, ô, û | Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= |
++-----------------------------------+------------------------------------------------------+
+
+select base64encode('circumflex accents: â, ê, î, ô, û') as encoded,
+ base64decode(base64encode('circumflex accents: â, ê, î, ô, û')) as decoded;
++------------------------------------------------------+-----------------------------------+
+| encoded | decoded |
++------------------------------------------------------+-----------------------------------+
+| Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= | circumflex accents: â, ê, î, ô, û |
++------------------------------------------------------+-----------------------------------+
+</codeblock>
+ </p>
+
<codeblock id="parquet_fallback_schema_resolution_example"><![CDATA[
create database schema_evolution;
use schema_evolution;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml
index 5758c52..36024f7 100644
--- a/docs/topics/impala_string_functions.xml
+++ b/docs/topics/impala_string_functions.xml
@@ -85,6 +85,59 @@ under the License.
</dlentry>
+ <dlentry id="base64decode" rev="2.6.0 IMPALA-2107">
+
+ <dt>
+ <codeph>base64decode(string str)</codeph>
+ </dt>
+
+ <dd>
+ <indexterm audience="hidden">base64decode() function</indexterm>
+ <b>Purpose:</b>
+ <p>
+ <b>Return type:</b> <codeph>string</codeph>
+ </p>
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+ <p>
+ For general information about Base64 encoding, see
+ <xref keyref="base64"/>.
+ </p>
+ <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+ <p conref="../shared/impala_common.xml#common/base64_charset"/>
+ <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+ <p conref="../shared/impala_common.xml#common/base64_error_handling"/>
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+ <p conref="../shared/impala_common.xml#common/base64_examples"/>
+ </dd>
+
+ </dlentry>
+
+ <dlentry id="base64encode" rev="2.6.0 IMPALA-2107">
+
+ <dt>
+ <codeph>base64encode(string str)</codeph>
+ </dt>
+
+ <dd>
+ <indexterm audience="hidden">base64encode() function</indexterm>
+ <b>Purpose:</b>
+ <p>
+ <b>Return type:</b> <codeph>string</codeph>
+ </p>
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+ <p>
+ For general information about Base64 encoding, see
+ <xref keyref="base64"/>.
+ </p>
+ <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+ <p conref="../shared/impala_common.xml#common/base64_charset"/>
+ <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+ <p conref="../shared/impala_common.xml#common/base64_examples"/>
+ </dd>
+
+ </dlentry>
+
<dlentry rev="2.3.0" id="btrim">
<dt>