You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/09/08 16:50:47 UTC

[1/3] incubator-impala git commit: Bump Kudu version to a71ecfd

Repository: incubator-impala
Updated Branches:
  refs/heads/master e993b9712 -> 2fbdc8e37


Bump Kudu version to a71ecfd

Change-Id: Ie23d852f0d630f9484d8ae4f772af6bba13ea24f
Reviewed-on: http://gerrit.cloudera.org:8080/8000
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/72b7e1cc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/72b7e1cc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/72b7e1cc

Branch: refs/heads/master
Commit: 72b7e1cc12a17fab2e8e72eb1db898388b8814fd
Parents: e993b97
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Thu Sep 7 11:51:43 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 23:00:16 2017 +0000

----------------------------------------------------------------------
 bin/impala-config.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/72b7e1cc/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index f3354f1..c3a25df 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -72,7 +72,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=459-0157f69796
+export IMPALA_TOOLCHAIN_BUILD_ID=462-a06b20680a
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -120,7 +120,7 @@ if [[ $OSTYPE == "darwin"* ]]; then
 fi
 
 # Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.
-export IMPALA_KUDU_VERSION=1c70e5d
+export IMPALA_KUDU_VERSION=a71ecfd
 
 # Kudu version used to identify Java client jar from maven
 export KUDU_JAVA_VERSION=1.6.0-cdh5.14.0-SNAPSHOT


[3/3] incubator-impala git commit: IMPALA-5867: Fix bugs parsing 2-digit year

Posted by mj...@apache.org.
IMPALA-5867: Fix bugs parsing 2-digit year

This patch fixes several bugs parsing 1 or 2-digit year formats.
Existing code is broken in several ways:
1. With 1 or 2-digit year format and month/day missing, ParseDateTime()
   throws an uncaught exception.
2. If now() is 02/29 in a leap year but (now() - 80 years) isn't,
   DateTimeFormatContext::SetCenturyBreak() throws an uncaught
   exception.
3. If the year parsed is 02/29 in a leap year but it isn't a leap year
   100 years ago, TimestampParser::Parse() will consider the date as
   invalid though it isn't.
This patch fixes above bugs and adds a few test cases in
be/src/runtime/timestamp-test.cc
The behaviors after change is:
1. A date without month or day is considered invalid. This is a
   pre-existing difference from Hive, which defaults missing month/day
   to 01/01.
2. Century break would be set to 02/28 80 years ago.
3. If parsed date is 00/02/29 but 1900/02/29 does not exist, treat
   it as 03/01 when comparing to century break.

Change-Id: Ia4f430caea88b6c33f8050a1984ee0ee32ecb0a1
Reviewed-on: http://gerrit.cloudera.org:8080/7910
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2fbdc8e3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2fbdc8e3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2fbdc8e3

Branch: refs/heads/master
Commit: 2fbdc8e37e4cb0a3b3408e90b5a972d778fea7eb
Parents: ac68913
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Wed Aug 30 14:14:52 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Sep 8 03:05:11 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/timestamp-parse-util.cc | 95 +++++++++++++++++------------
 be/src/runtime/timestamp-parse-util.h  | 10 +++
 be/src/runtime/timestamp-test.cc       | 49 +++++++++++----
 3 files changed, 104 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fbdc8e3/be/src/runtime/timestamp-parse-util.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc
index 9b9e8c8..e64d904 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -29,8 +29,10 @@ namespace assign = boost::assign;
 using boost::unordered_map;
 using boost::gregorian::date;
 using boost::gregorian::date_duration;
+using boost::gregorian::gregorian_calendar;
 using boost::posix_time::hours;
 using boost::posix_time::not_a_date_time;
+using boost::posix_time::ptime;
 using boost::posix_time::time_duration;
 
 namespace impala {
@@ -45,6 +47,8 @@ struct DateTimeParseResult {
   int second;
   int32_t fraction;
   boost::posix_time::time_duration tz_offset;
+  // Whether to realign the year for 2-digit year format
+  bool realign_year;
 
   DateTimeParseResult()
     : year(0),
@@ -54,14 +58,22 @@ struct DateTimeParseResult {
       minute(0),
       second(0),
       fraction(0),
-      tz_offset(0,0,0,0) {
+      tz_offset(0,0,0,0),
+      realign_year(false) {
   }
 };
 
 void DateTimeFormatContext::SetCenturyBreak(const TimestampValue &now) {
-  const date& now_date = now.date();
-  century_break_ptime = boost::posix_time::ptime(
-      date(now_date.year() - 80, now_date.month(), now_date.day()), now.time());
+  auto& now_date = now.date();
+  // If the century break is at an invalid 02/29, set it to 02/28 for consistency with
+  // Hive.
+  if (now_date.month() == 2 && now_date.day() == 29 &&
+      !gregorian_calendar::is_leap_year(now_date.year() - 80)) {
+    century_break_ptime = ptime(date(now_date.year() - 80, 2, 28), now.time());
+  } else {
+    century_break_ptime = ptime(
+        date(now_date.year() - 80, now_date.month(), now_date.day()), now.time());
+  }
 }
 
 bool TimestampParser::initialized_ = false;
@@ -301,6 +313,32 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d,
   }
 }
 
+date TimestampParser::RealignYear(const DateTimeParseResult& dt_result,
+    const DateTimeFormatContext& dt_ctx, int day_offset, const time_duration& t) {
+  DCHECK(!dt_ctx.century_break_ptime.is_special());
+  // Let the century start at AABB and the year parsed be YY, this gives us AAYY.
+  int year = dt_result.year + (dt_ctx.century_break_ptime.date().year() / 100) * 100;
+  date unshifted_date;
+  // The potential actual date (02/29 in unshifted year + 100 years) might be valid
+  // even if unshifted date is not, so try to make unshifted date valid by adding 1 day.
+  // This makes the behavior closer to Hive.
+  if (dt_result.month == 2 && dt_result.day == 29 &&
+      !gregorian_calendar::is_leap_year(year)) {
+    unshifted_date = date(year, 3, 1);
+  } else {
+    unshifted_date = date(year, dt_result.month, dt_result.day);
+  }
+  unshifted_date += date_duration(day_offset);
+  // Advance 100 years if parsed time is before the century break.
+  // For example if the century breaks at 1937 but dt_result->year = 1936,
+  // the correct year would be 2036.
+  if (ptime(unshifted_date, t) < dt_ctx.century_break_ptime) {
+    return date(year + 100, dt_result.month, dt_result.day) + date_duration(day_offset);
+  } else {
+    return date(year, dt_result.month, dt_result.day) + date_duration(day_offset);
+  }
+}
+
 bool TimestampParser::Parse(const char* str, int len, const DateTimeFormatContext& dt_ctx,
     date* d, time_duration* t) {
   DCHECK(TimestampParser::initialized_);
@@ -330,25 +368,23 @@ bool TimestampParser::Parse(const char* str, int len, const DateTimeFormatContex
     *t = time_duration(0, 0, 0, 0);
   }
   if (dt_ctx.has_date_toks) {
-    bool is_valid_date = true;
     try {
       DCHECK(-1 <= day_offset && day_offset <= 1);
-      if ((dt_result.year == 1400 && dt_result.month == 1 && dt_result.day == 1 &&
-           day_offset == -1) ||
-          (dt_result.year == 9999 && dt_result.month == 12 && dt_result.day == 31 &&
-           day_offset == 1)) {
-        // Have to check lower/upper bound explicitly.
-        // Tried date::is_not_a_date_time() but it doesn't complain value is out of range
-        // for "'1400-01-01' - 1 day" and "'9999-12-31' + 1 day".
-        is_valid_date = false;
+      if (dt_result.realign_year) {
+        *d = RealignYear(dt_result, dt_ctx, day_offset, *t);
       } else {
-        *d = date(dt_result.year, dt_result.month, dt_result.day);
-        *d += date_duration(day_offset);
+        *d = date(dt_result.year, dt_result.month, dt_result.day)
+             + date_duration(day_offset);
+      }
+      // Have to check year lower/upper bound [1400, 9999] here because
+      // operator + (date, date_duration) won't throw an exception even if the result is
+      // out-of-range.
+      if (d->year() < 1400 || d->year() > 9999) {
+        // Calling year() on out-of-range date throws an exception itself. This branch is
+        // to describe the checking logic but is never taken.
+        DCHECK(false);
       }
     } catch (boost::exception&) {
-      is_valid_date = false;
-    }
-    if (!is_valid_date) {
       VLOG_ROW << "Invalid date: " << dt_result.year << "-" << dt_result.month << "-"
                << dt_result.day;
       *d = date();
@@ -428,8 +464,6 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
   // Keep track of the number of characters we need to shift token positions by.
   // Variable-length tokens will result in values > 0;
   int shift_len = 0;
-  // Whether to realign the year for 2-digit year format
-  bool realign_year = false;
   for (const DateTimeFormatToken& tok: dt_ctx.toks) {
     const char* tok_val = str + tok.pos + shift_len;
     if (tok.type == SEPARATOR) {
@@ -449,10 +483,10 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
       case YEAR: {
         dt_result->year = StringParser::StringToInt<int>(tok_val, tok_len, &status);
         if (UNLIKELY(StringParser::PARSE_SUCCESS != status)) return false;
-        if (UNLIKELY(dt_result->year < 1 || dt_result->year > 9999)) return false;
+        if (UNLIKELY(dt_result->year < 0 || dt_result->year > 9999)) return false;
         // Year in "Y" and "YY" format should be in the interval
         // [current time - 80 years, current time + 20 years)
-        if (tok_len <= 2) realign_year = true;
+        if (tok_len <= 2) dt_result->realign_year = true;
         break;
       }
       case MONTH_IN_YEAR: {
@@ -546,23 +580,6 @@ bool TimestampParser::ParseDateTime(const char* str, int str_len,
       default: DCHECK(false) << "Unknown date/time format token";
     }
   }
-  // Hive uses Java's SimpleDateFormat to parse timestamp:
-  // In SimpleDateFormat, the century for 2-digit-year breaks at current_time - 80 years.
-  // https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
-  if (realign_year) {
-    DCHECK(!dt_ctx.century_break_ptime.is_special());
-    // Let the century start at AABB and the year parsed be YY, this gives us AAYY.
-    dt_result->year += (dt_ctx.century_break_ptime.date().year() / 100) * 100;
-    date parsed_date(dt_result->year, dt_result->month, dt_result->day);
-    time_duration parsed_time(dt_result->hour, dt_result->minute, dt_result->second,
-        dt_result->fraction);
-    // Advance 100 years if parsed time is before the century break
-    // For example if the century breaks at 1937 but dt_result->year = 1936,
-    // the correct year would be 2036.
-    if (boost::posix_time::ptime(parsed_date, parsed_time) < dt_ctx.century_break_ptime) {
-      dt_result->year += 100;
-    }
-  }
   return true;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fbdc8e3/be/src/runtime/timestamp-parse-util.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h
index b68cc85..bbcc03f 100644
--- a/be/src/runtime/timestamp-parse-util.h
+++ b/be/src/runtime/timestamp-parse-util.h
@@ -219,6 +219,16 @@ class TimestampParser {
   static bool ParseDateTime(const char* str, int str_len,
       const DateTimeFormatContext& dt_ctx, DateTimeParseResult* dt_result);
 
+  /// Helper function finding the correct century for 1 or 2 digit year according to
+  /// century break. Throws bad_year, bad_day_of_month, or bad_day_month if the date is
+  /// invalid. The century break behavior is copied from Java SimpleDateFormat in order to
+  /// be consistent with Hive.
+  /// In SimpleDateFormat, the century for 2-digit-year breaks at current_time - 80 years.
+  /// https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
+  static boost::gregorian::date RealignYear(const DateTimeParseResult& dt_result,
+      const DateTimeFormatContext& dt_ctx, int day_offset,
+      const boost::posix_time::time_duration& t);
+
   /// Check if the string is a TimeZone offset token.
   /// Valid offset token format are 'hh:mm', 'hhmm', 'hh'.
   static bool IsValidTZOffset(const char* str_begin, const char* str_end);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fbdc8e3/be/src/runtime/timestamp-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-test.cc b/be/src/runtime/timestamp-test.cc
index 5f3e0e6..b8919cc 100644
--- a/be/src/runtime/timestamp-test.cc
+++ b/be/src/runtime/timestamp-test.cc
@@ -231,7 +231,8 @@ void TestTimestampTokens(vector<TimestampToken>* toks, int year, int month,
 
 TEST(TimestampTest, Basic) {
   // Fix current time to determine the behavior parsing 2-digit year format
-  TimestampValue now(date(2017, 7, 28), time_duration(16, 14, 24));
+  // Set it to 03/01 to test 02/29 edge cases.
+  TimestampValue now(date(1980, 3, 1), time_duration(16, 14, 24));
 
   char s1[] = "2012-01-20 01:10:01";
   char s2[] = "1990-10-20 10:10:10.123456789  ";
@@ -426,6 +427,19 @@ TEST(TimestampTest, Basic) {
   }
   // Test parsing/formatting of complex date/time formats
   vector<TimestampTC> test_cases = boost::assign::list_of
+    // Test year upper/lower bound
+    (TimestampTC("yyyy-MM-dd HH:mm:ss", "1400-01-01 00:00:00",
+        false, true, false, 1400, 1, 1))
+    (TimestampTC("yyyy-MM-dd HH:mm:ss", "1399-12-31 23:59:59",
+        false, true))
+    (TimestampTC("yyyy-MM-dd HH:mm:ss", "9999-12-31 23:59:59",
+        false, true, false, 9999, 12, 31, 23, 59, 59))
+    (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "1400-01-01 01:00:00 +01", false, true, false,
+        1400, 1, 1, 0, 0, 0))
+    (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "1400-01-01 01:00:00 +02", false, true))
+    (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "9999-12-31 22:00:00 -01", false, true, false,
+        9999, 12, 31, 23, 0, 0))
+    (TimestampTC("yyyy-MM-dd HH:mm:ss +hh", "9999-12-31 22:00:00 -02", false, true))
     // Test case on literal short months
     (TimestampTC("yyyy-MMM-dd", "2013-OCT-01", false, true, false, 2013, 10, 1))
     // Test case on literal short months
@@ -470,19 +484,32 @@ TEST(TimestampTest, Basic) {
     (TimestampTC("MMdd", "1201", false, true))
     // Test missing month
     (TimestampTC("yyyydd", "201301", false, true))
-    // Test missing month
-    (TimestampTC("yyyymm", "201301", false, true))
+    (TimestampTC("yydd", "1301", false, true))
+    // Test missing day
+    (TimestampTC("yyyyMM", "201301", false, true))
+    (TimestampTC("yyMM", "8512", false, true))
+    // Test missing month and day
+    (TimestampTC("yyyy", "2013", false, true))
+    (TimestampTC("yy", "13", false, true))
     // Test short year token
     (TimestampTC("y-MM-dd", "2013-11-13", false, true, false, 2013, 11, 13))
-    (TimestampTC("y-MM-dd", "13-11-13", false, true, false, 2013, 11, 13))
+    (TimestampTC("y-MM-dd", "13-11-13", false, true, false, 1913, 11, 13))
     // Test 2-digit year format
-    (TimestampTC("yy-MM-dd", "37-07-28", false, true, false, 2037, 7, 28))
-    (TimestampTC("yy-MM-dd", "37-07-29", false, true, false, 1937, 7, 29))
+    (TimestampTC("yy-MM-dd", "17-08-31", false, true, false, 1917, 8, 31))
+    (TimestampTC("yy-MM-dd", "99-08-31", false, true, false, 1999, 8, 31))
+    // Test 02/29 edge cases of 2-digit year format
+    (TimestampTC("yy-MM-dd", "00-02-28", false, true, false, 2000, 2, 28))
+    (TimestampTC("yy-MM-dd", "00-02-29", false, true, false, 2000, 2, 29))
+    (TimestampTC("yy-MM-dd", "00-03-01", false, true, false, 2000, 3, 1))
+    (TimestampTC("yy-MM-dd", "00-03-02", false, true, false, 1900, 3, 2))
+    (TimestampTC("yy-MM-dd", "04-02-29", false, true, false, 1904, 2, 29))
+    (TimestampTC("yy-MM-dd", "99-02-29", false, true))
     // Test 1-digit year format with time to show the exact boundary
-    (TimestampTC("y-MM-dd HH:mm:ss", "37-07-28 16:14:23", false, true, false,
-                 2037, 7, 28, 16, 14, 23))
-    (TimestampTC("y-MM-dd HH:mm:ss", "37-07-28 16:14:24", false, true, false,
-                 1937, 7, 28, 16, 14, 24))
+    // Before the cutoff. Year should be 2000
+    (TimestampTC("y-MM-dd HH:mm:ss", "00-02-29 16:14:23", false, true, false,
+        2000, 2, 29, 16, 14, 23))
+    // After the cutoff but 02/29/1900 is invalid
+    (TimestampTC("y-MM-dd HH:mm:ss", "00-02-29 16:14:24", false, true))
     // Test short month token
     (TimestampTC("yyyy-M-dd", "2013-11-13", false, true, false, 2013, 11, 13))
     (TimestampTC("yyyy-M-dd", "2013-1-13", false, true, false, 2013, 1, 13))
@@ -491,7 +518,7 @@ TEST(TimestampTest, Basic) {
     (TimestampTC("yyyy-MM-d", "2013-11-3", false, true, false, 2013, 11, 3))
     // Test short all date tokens
     (TimestampTC("y-M-d", "2013-11-13", false, true, false, 2013, 11, 13))
-    (TimestampTC("y-M-d", "13-1-3", false, true, false, 2013, 1, 3))
+    (TimestampTC("y-M-d", "13-1-3", false, true, false, 1913, 1, 3))
     // Test short hour token
     (TimestampTC("H:mm:ss", "14:24:34", false, false, true, 0, 0, 0, 14, 24, 34))
     (TimestampTC("H:mm:ss", "4:24:34", false, false, true, 0, 0, 0, 4, 24, 34))


[2/3] incubator-impala git commit: IMPALA-2107: [DOCS] Document base64*code() functions

Posted by mj...@apache.org.
IMPALA-2107: [DOCS] Document base64*code() functions

base64decode()
base64encode()

Change-Id: I5251e368ad36756c19a7b97e5ef6f232f616189b
Reviewed-on: http://gerrit.cloudera.org:8080/7963
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ac689131
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ac689131
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ac689131

Branch: refs/heads/master
Commit: ac689131190f5bf01a7c0a4892c30647139e7d32
Parents: 72b7e1c
Author: John Russell <jr...@cloudera.com>
Authored: Tue Aug 29 17:15:50 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 23:41:25 2017 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap             |   3 +
 docs/shared/impala_common.xml           | 117 +++++++++++++++++++++++++++
 docs/topics/impala_string_functions.xml |  53 ++++++++++++
 3 files changed, 173 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index cdcaed6..518afef 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -143,6 +143,9 @@ under the License.
   <keydef href="http://www.lzop.org/" scope="external" format="html" keys="lzop.org"/>
 
 <!-- Links to Wikipedia pages for background on industry terminology. -->
+  <keydef href="https://en.wikipedia.org/wiki/Base64" scope="external" format="html" keys="base64">
+    <topicmeta><linktext>Base64 article on Wikipedia</linktext></topicmeta>
+  </keydef>
   <keydef href="http://en.wikipedia.org/wiki/.htpasswd" scope="external" format="html" keys=".htpasswd"/>
   <keydef href="http://en.wikipedia.org/wiki/Coordinated_Universal_Time" scope="external" format="html" keys="Coordinated_Universal_Time"/>
   <keydef href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function" scope="external" format="html" keys="wiki_fnv"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 5d79acc..9d6f72b 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -773,6 +773,123 @@ select concat('abc','mno','xyz');</codeblock>
         HBase tables.
       </p>
 
+      <p id="base64_charset">
+        The set of characters that can be generated as output
+        from <codeph>base64encode()</codeph>, or specified in
+        the argument string to <codeph>base64decode()</codeph>,
+        are the ASCII uppercase and lowercase letters (A-Z, a-z),
+        digits (0-9), and the punctuation characters
+        <codeph>+</codeph>, <codeph>/</codeph>, and <codeph>=</codeph>.
+      </p>
+
+      <p id="base64_error_handling">
+        If the argument string to <codeph>base64decode()</codeph> does
+        not represent a valid base64-encoded value, subject to the
+        constraints of the Impala implementation such as the allowed
+        character set, the function returns <codeph>NULL</codeph>.
+      </p>
+
+      <p id="base64_use_cases">
+        The functions <codeph>base64encode()</codeph> and
+        <codeph>base64decode()</codeph> are typically used
+        in combination, to store in an Impala table string data that is
+        problematic to store or transmit. For example, you could use
+        these functions to store string data that uses an encoding
+        other than UTF-8, or to transform the values in contexts that
+        require ASCII values, such as for partition key columns.
+        Keep in mind that base64-encoded values produce different results
+        for string functions such as <codeph>LENGTH()</codeph>,
+        <codeph>MAX()</codeph>, and <codeph>MIN()</codeph> than when
+        those functions are called with the unencoded string values.
+      </p>
+
+      <p id="base64_alignment">
+        All return values produced by <codeph>base64encode()</codeph>
+        are a multiple of 4 bytes in length. All argument values
+        supplied to <codeph>base64decode()</codeph> must also be a
+        multiple of 4 bytes in length. If a base64-encoded value
+        would otherwise have a different length, it can be padded
+        with trailing <codeph>=</codeph> characters to reach a length
+        that is a multiple of 4 bytes.
+      </p>
+
+      <p id="base64_examples">
+        The following examples show how to use <codeph>base64encode()</codeph>
+        and <codeph>base64decode()</codeph> together to store and retrieve
+        string values:
+<codeblock>
+-- An arbitrary string can be encoded in base 64.
+-- The length of the output is a multiple of 4 bytes,
+-- padded with trailing = characters if necessary.
+select base64encode('hello world') as encoded,
+  length(base64encode('hello world')) as length;
++------------------+--------+
+| encoded          | length |
++------------------+--------+
+| aGVsbG8gd29ybGQ= | 16     |
++------------------+--------+
+
+-- Passing an encoded value to base64decode() produces
+-- the original value.
+select base64decode('aGVsbG8gd29ybGQ=') as decoded;
++-------------+
+| decoded     |
++-------------+
+| hello world |
++-------------+
+</codeblock>
+
+      These examples demonstrate incorrect encoded values that
+      produce <codeph>NULL</codeph> return values when decoded:
+
+<codeblock>
+-- The input value to base64decode() must be a multiple of 4 bytes.
+-- In this case, leaving off the trailing = padding character
+-- produces a NULL return value.
+select base64decode('aGVsbG8gd29ybGQ') as decoded;
++---------+
+| decoded |
++---------+
+| NULL    |
++---------+
+WARNINGS: UDF WARNING: Invalid base64 string; input length is 15,
+  which is not a multiple of 4.
+
+-- The input to base64decode() can only contain certain characters.
+-- The $ character in this case causes a NULL return value.
+select base64decode('abc$');
++----------------------+
+| base64decode('abc$') |
++----------------------+
+| NULL                 |
++----------------------+
+WARNINGS: UDF WARNING: Could not base64 decode input in space 4; actual output length 0
+</codeblock>
+
+      These examples demonstrate <q>round-tripping</q> of an original string to an
+      encoded string, and back again. This technique is applicable if the original
+      source is in an unknown encoding, or if some intermediate processing stage
+      might cause national characters to be misrepresented:
+
+<codeblock>
+select 'circumflex accents: â, ê, î, ô, û' as original,
+  base64encode('circumflex accents: â, ê, î, ô, û') as encoded;
++-----------------------------------+------------------------------------------------------+
+| original                          | encoded                                              |
++-----------------------------------+------------------------------------------------------+
+| circumflex accents: â, ê, î, ô, û | Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= |
++-----------------------------------+------------------------------------------------------+
+
+select base64encode('circumflex accents: â, ê, î, ô, û') as encoded,
+  base64decode(base64encode('circumflex accents: â, ê, î, ô, û')) as decoded;
++------------------------------------------------------+-----------------------------------+
+| encoded                                              | decoded                           |
++------------------------------------------------------+-----------------------------------+
+| Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= | circumflex accents: â, ê, î, ô, û |
++------------------------------------------------------+-----------------------------------+
+</codeblock>
+      </p>
+
 <codeblock id="parquet_fallback_schema_resolution_example"><![CDATA[
 create database schema_evolution;
 use schema_evolution;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml
index 5758c52..36024f7 100644
--- a/docs/topics/impala_string_functions.xml
+++ b/docs/topics/impala_string_functions.xml
@@ -85,6 +85,59 @@ under the License.
 
       </dlentry>
 
+      <dlentry id="base64decode" rev="2.6.0 IMPALA-2107">
+
+        <dt>
+          <codeph>base64decode(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="hidden">base64decode() function</indexterm>
+          <b>Purpose:</b>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            For general information about Base64 encoding, see
+            <xref keyref="base64"/>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+          <p conref="../shared/impala_common.xml#common/base64_charset"/>
+          <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+          <p conref="../shared/impala_common.xml#common/base64_error_handling"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p conref="../shared/impala_common.xml#common/base64_examples"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="base64encode" rev="2.6.0 IMPALA-2107">
+
+        <dt>
+          <codeph>base64encode(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="hidden">base64encode() function</indexterm>
+          <b>Purpose:</b>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            For general information about Base64 encoding, see
+            <xref keyref="base64"/>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+          <p conref="../shared/impala_common.xml#common/base64_charset"/>
+          <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p conref="../shared/impala_common.xml#common/base64_examples"/>
+        </dd>
+
+      </dlentry>
+
       <dlentry rev="2.3.0" id="btrim">
 
         <dt>