You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/09/08 16:50:48 UTC
[2/3] incubator-impala git commit: IMPALA-2107: [DOCS] Document
base64*code() functions
IMPALA-2107: [DOCS] Document base64*code() functions
base64decode()
base64encode()
Change-Id: I5251e368ad36756c19a7b97e5ef6f232f616189b
Reviewed-on: http://gerrit.cloudera.org:8080/7963
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ac689131
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ac689131
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ac689131
Branch: refs/heads/master
Commit: ac689131190f5bf01a7c0a4892c30647139e7d32
Parents: 72b7e1c
Author: John Russell <jr...@cloudera.com>
Authored: Tue Aug 29 17:15:50 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 23:41:25 2017 +0000
----------------------------------------------------------------------
docs/impala_keydefs.ditamap | 3 +
docs/shared/impala_common.xml | 117 +++++++++++++++++++++++++++
docs/topics/impala_string_functions.xml | 53 ++++++++++++
3 files changed, 173 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index cdcaed6..518afef 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -143,6 +143,9 @@ under the License.
<keydef href="http://www.lzop.org/" scope="external" format="html" keys="lzop.org"/>
<!-- Links to Wikipedia pages for background on industry terminology. -->
+ <keydef href="https://en.wikipedia.org/wiki/Base64" scope="external" format="html" keys="base64">
+ <topicmeta><linktext>Base64 article on Wikipedia</linktext></topicmeta>
+ </keydef>
<keydef href="http://en.wikipedia.org/wiki/.htpasswd" scope="external" format="html" keys=".htpasswd"/>
<keydef href="http://en.wikipedia.org/wiki/Coordinated_Universal_Time" scope="external" format="html" keys="Coordinated_Universal_Time"/>
<keydef href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function" scope="external" format="html" keys="wiki_fnv"/>
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 5d79acc..9d6f72b 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -773,6 +773,123 @@ select concat('abc','mno','xyz');</codeblock>
HBase tables.
</p>
+ <p id="base64_charset">
+ The set of characters that can be generated as output
+ from <codeph>base64encode()</codeph>, or specified in
+ the argument string to <codeph>base64decode()</codeph>,
+ are the ASCII uppercase and lowercase letters (A-Z, a-z),
+ digits (0-9), and the punctuation characters
+ <codeph>+</codeph>, <codeph>/</codeph>, and <codeph>=</codeph>.
+ </p>
+
+ <p id="base64_error_handling">
+ If the argument string to <codeph>base64decode()</codeph> does
+ not represent a valid base64-encoded value, subject to the
+ constraints of the Impala implementation such as the allowed
+ character set, the function returns <codeph>NULL</codeph>.
+ </p>
+
+ <p id="base64_use_cases">
+ The functions <codeph>base64encode()</codeph> and
+ <codeph>base64decode()</codeph> are typically used
+ in combination, to store in an Impala table string data that is
+ problematic to store or transmit. For example, you could use
+ these functions to store string data that uses an encoding
+ other than UTF-8, or to transform the values in contexts that
+ require ASCII values, such as for partition key columns.
+ Keep in mind that base64-encoded values produce different results
+ for string functions such as <codeph>LENGTH()</codeph>,
+ <codeph>MAX()</codeph>, and <codeph>MIN()</codeph> than when
+ those functions are called with the unencoded string values.
+ </p>
+
+ <p id="base64_alignment">
+ All return values produced by <codeph>base64encode()</codeph>
+ are a multiple of 4 bytes in length. All argument values
+ supplied to <codeph>base64decode()</codeph> must also be a
+ multiple of 4 bytes in length. If a base64-encoded value
+ would otherwise have a different length, it can be padded
+ with trailing <codeph>=</codeph> characters to reach a length
+ that is a multiple of 4 bytes.
+ </p>
+
+ <p id="base64_examples">
+ The following examples show how to use <codeph>base64encode()</codeph>
+ and <codeph>base64decode()</codeph> together to store and retrieve
+ string values:
+<codeblock>
+-- An arbitrary string can be encoded in base 64.
+-- The length of the output is a multiple of 4 bytes,
+-- padded with trailing = characters if necessary.
+select base64encode('hello world') as encoded,
+ length(base64encode('hello world')) as length;
++------------------+--------+
+| encoded | length |
++------------------+--------+
+| aGVsbG8gd29ybGQ= | 16 |
++------------------+--------+
+
+-- Passing an encoded value to base64decode() produces
+-- the original value.
+select base64decode('aGVsbG8gd29ybGQ=') as decoded;
++-------------+
+| decoded |
++-------------+
+| hello world |
++-------------+
+</codeblock>
+
+ These examples demonstrate incorrect encoded values that
+ produce <codeph>NULL</codeph> return values when decoded:
+
+<codeblock>
+-- The input value to base64decode() must be a multiple of 4 bytes.
+-- In this case, leaving off the trailing = padding character
+-- produces a NULL return value.
+select base64decode('aGVsbG8gd29ybGQ') as decoded;
++---------+
+| decoded |
++---------+
+| NULL |
++---------+
+WARNINGS: UDF WARNING: Invalid base64 string; input length is 15,
+ which is not a multiple of 4.
+
+-- The input to base64decode() can only contain certain characters.
+-- The $ character in this case causes a NULL return value.
+select base64decode('abc$');
++----------------------+
+| base64decode('abc$') |
++----------------------+
+| NULL |
++----------------------+
+WARNINGS: UDF WARNING: Could not base64 decode input in space 4; actual output length 0
+</codeblock>
+
+ These examples demonstrate <q>round-tripping</q> of an original string to an
+ encoded string, and back again. This technique is applicable if the original
+ source is in an unknown encoding, or if some intermediate processing stage
+ might cause national characters to be misrepresented:
+
+<codeblock>
+select 'circumflex accents: â, ê, î, ô, û' as original,
+ base64encode('circumflex accents: â, ê, î, ô, û') as encoded;
++-----------------------------------+------------------------------------------------------+
+| original | encoded |
++-----------------------------------+------------------------------------------------------+
+| circumflex accents: â, ê, î, ô, û | Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= |
++-----------------------------------+------------------------------------------------------+
+
+select base64encode('circumflex accents: â, ê, î, ô, û') as encoded,
+ base64decode(base64encode('circumflex accents: â, ê, î, ô, û')) as decoded;
++------------------------------------------------------+-----------------------------------+
+| encoded | decoded |
++------------------------------------------------------+-----------------------------------+
+| Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= | circumflex accents: â, ê, î, ô, û |
++------------------------------------------------------+-----------------------------------+
+</codeblock>
+ </p>
+
<codeblock id="parquet_fallback_schema_resolution_example"><![CDATA[
create database schema_evolution;
use schema_evolution;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml
index 5758c52..36024f7 100644
--- a/docs/topics/impala_string_functions.xml
+++ b/docs/topics/impala_string_functions.xml
@@ -85,6 +85,59 @@ under the License.
</dlentry>
+ <dlentry id="base64decode" rev="2.6.0 IMPALA-2107">
+
+ <dt>
+ <codeph>base64decode(string str)</codeph>
+ </dt>
+
+ <dd>
+ <indexterm audience="hidden">base64decode() function</indexterm>
+ <b>Purpose:</b>
+ <p>
+ <b>Return type:</b> <codeph>string</codeph>
+ </p>
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+ <p>
+ For general information about Base64 encoding, see
+ <xref keyref="base64"/>.
+ </p>
+ <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+ <p conref="../shared/impala_common.xml#common/base64_charset"/>
+ <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+ <p conref="../shared/impala_common.xml#common/base64_error_handling"/>
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+ <p conref="../shared/impala_common.xml#common/base64_examples"/>
+ </dd>
+
+ </dlentry>
+
+ <dlentry id="base64encode" rev="2.6.0 IMPALA-2107">
+
+ <dt>
+ <codeph>base64encode(string str)</codeph>
+ </dt>
+
+ <dd>
+ <indexterm audience="hidden">base64encode() function</indexterm>
+ <b>Purpose:</b>
+ <p>
+ <b>Return type:</b> <codeph>string</codeph>
+ </p>
+ <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+ <p>
+ For general information about Base64 encoding, see
+ <xref keyref="base64"/>.
+ </p>
+ <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+ <p conref="../shared/impala_common.xml#common/base64_charset"/>
+ <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+ <p conref="../shared/impala_common.xml#common/example_blurb"/>
+ <p conref="../shared/impala_common.xml#common/base64_examples"/>
+ </dd>
+
+ </dlentry>
+
<dlentry rev="2.3.0" id="btrim">
<dt>