You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/09/08 16:50:48 UTC

[2/3] incubator-impala git commit: IMPALA-2107: [DOCS] Document base64*code() functions

IMPALA-2107: [DOCS] Document base64*code() functions

base64decode()
base64encode()

Change-Id: I5251e368ad36756c19a7b97e5ef6f232f616189b
Reviewed-on: http://gerrit.cloudera.org:8080/7963
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ac689131
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ac689131
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ac689131

Branch: refs/heads/master
Commit: ac689131190f5bf01a7c0a4892c30647139e7d32
Parents: 72b7e1c
Author: John Russell <jr...@cloudera.com>
Authored: Tue Aug 29 17:15:50 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Sep 7 23:41:25 2017 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap             |   3 +
 docs/shared/impala_common.xml           | 117 +++++++++++++++++++++++++++
 docs/topics/impala_string_functions.xml |  53 ++++++++++++
 3 files changed, 173 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index cdcaed6..518afef 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -143,6 +143,9 @@ under the License.
   <keydef href="http://www.lzop.org/" scope="external" format="html" keys="lzop.org"/>
 
 <!-- Links to Wikipedia pages for background on industry terminology. -->
+  <keydef href="https://en.wikipedia.org/wiki/Base64" scope="external" format="html" keys="base64">
+    <topicmeta><linktext>Base64 article on Wikipedia</linktext></topicmeta>
+  </keydef>
   <keydef href="http://en.wikipedia.org/wiki/.htpasswd" scope="external" format="html" keys=".htpasswd"/>
   <keydef href="http://en.wikipedia.org/wiki/Coordinated_Universal_Time" scope="external" format="html" keys="Coordinated_Universal_Time"/>
   <keydef href="http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function" scope="external" format="html" keys="wiki_fnv"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 5d79acc..9d6f72b 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -773,6 +773,123 @@ select concat('abc','mno','xyz');</codeblock>
         HBase tables.
       </p>
 
+      <p id="base64_charset">
+        The set of characters that can be generated as output
+        from <codeph>base64encode()</codeph>, or specified in
+        the argument string to <codeph>base64decode()</codeph>,
+        are the ASCII uppercase and lowercase letters (A-Z, a-z),
+        digits (0-9), and the punctuation characters
+        <codeph>+</codeph>, <codeph>/</codeph>, and <codeph>=</codeph>.
+      </p>
+
+      <p id="base64_error_handling">
+        If the argument string to <codeph>base64decode()</codeph> does
+        not represent a valid base64-encoded value, subject to the
+        constraints of the Impala implementation such as the allowed
+        character set, the function returns <codeph>NULL</codeph>.
+      </p>
+
+      <p id="base64_use_cases">
+        The functions <codeph>base64encode()</codeph> and
+        <codeph>base64decode()</codeph> are typically used
+        in combination, to store in an Impala table string data that is
+        problematic to store or transmit. For example, you could use
+        these functions to store string data that uses an encoding
+        other than UTF-8, or to transform the values in contexts that
+        require ASCII values, such as for partition key columns.
+        Keep in mind that base64-encoded values produce different results
+        for string functions such as <codeph>LENGTH()</codeph>,
+        <codeph>MAX()</codeph>, and <codeph>MIN()</codeph> than when
+        those functions are called with the unencoded string values.
+      </p>
+
+      <p id="base64_alignment">
+        All return values produced by <codeph>base64encode()</codeph>
+        are a multiple of 4 bytes in length. All argument values
+        supplied to <codeph>base64decode()</codeph> must also be a
+        multiple of 4 bytes in length. If a base64-encoded value
+        would otherwise have a different length, it can be padded
+        with trailing <codeph>=</codeph> characters to reach a length
+        that is a multiple of 4 bytes.
+      </p>
+
+      <p id="base64_examples">
+        The following examples show how to use <codeph>base64encode()</codeph>
+        and <codeph>base64decode()</codeph> together to store and retrieve
+        string values:
+<codeblock>
+-- An arbitrary string can be encoded in base 64.
+-- The length of the output is a multiple of 4 bytes,
+-- padded with trailing = characters if necessary.
+select base64encode('hello world') as encoded,
+  length(base64encode('hello world')) as length;
++------------------+--------+
+| encoded          | length |
++------------------+--------+
+| aGVsbG8gd29ybGQ= | 16     |
++------------------+--------+
+
+-- Passing an encoded value to base64decode() produces
+-- the original value.
+select base64decode('aGVsbG8gd29ybGQ=') as decoded;
++-------------+
+| decoded     |
++-------------+
+| hello world |
++-------------+
+</codeblock>
+
+      These examples demonstrate incorrect encoded values that
+      produce <codeph>NULL</codeph> return values when decoded:
+
+<codeblock>
+-- The input value to base64decode() must be a multiple of 4 bytes.
+-- In this case, leaving off the trailing = padding character
+-- produces a NULL return value.
+select base64decode('aGVsbG8gd29ybGQ') as decoded;
++---------+
+| decoded |
++---------+
+| NULL    |
++---------+
+WARNINGS: UDF WARNING: Invalid base64 string; input length is 15,
+  which is not a multiple of 4.
+
+-- The input to base64decode() can only contain certain characters.
+-- The $ character in this case causes a NULL return value.
+select base64decode('abc$');
++----------------------+
+| base64decode('abc$') |
++----------------------+
+| NULL                 |
++----------------------+
+WARNINGS: UDF WARNING: Could not base64 decode input in space 4; actual output length 0
+</codeblock>
+
+      These examples demonstrate <q>round-tripping</q> of an original string to an
+      encoded string, and back again. This technique is applicable if the original
+      source is in an unknown encoding, or if some intermediate processing stage
+      might cause national characters to be misrepresented:
+
+<codeblock>
+select 'circumflex accents: â, ê, î, ô, û' as original,
+  base64encode('circumflex accents: â, ê, î, ô, û') as encoded;
++-----------------------------------+------------------------------------------------------+
+| original                          | encoded                                              |
++-----------------------------------+------------------------------------------------------+
+| circumflex accents: â, ê, î, ô, û | Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= |
++-----------------------------------+------------------------------------------------------+
+
+select base64encode('circumflex accents: â, ê, î, ô, û') as encoded,
+  base64decode(base64encode('circumflex accents: â, ê, î, ô, û')) as decoded;
++------------------------------------------------------+-----------------------------------+
+| encoded                                              | decoded                           |
++------------------------------------------------------+-----------------------------------+
+| Y2lyY3VtZmxleCBhY2NlbnRzOiDDoiwgw6osIMOuLCDDtCwgw7s= | circumflex accents: â, ê, î, ô, û |
++------------------------------------------------------+-----------------------------------+
+</codeblock>
+      </p>
+
 <codeblock id="parquet_fallback_schema_resolution_example"><![CDATA[
 create database schema_evolution;
 use schema_evolution;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ac689131/docs/topics/impala_string_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_string_functions.xml b/docs/topics/impala_string_functions.xml
index 5758c52..36024f7 100644
--- a/docs/topics/impala_string_functions.xml
+++ b/docs/topics/impala_string_functions.xml
@@ -85,6 +85,59 @@ under the License.
 
       </dlentry>
 
+      <dlentry id="base64decode" rev="2.6.0 IMPALA-2107">
+
+        <dt>
+          <codeph>base64decode(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="hidden">base64decode() function</indexterm>
+          <b>Purpose:</b>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            For general information about Base64 encoding, see
+            <xref keyref="base64"/>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+          <p conref="../shared/impala_common.xml#common/base64_charset"/>
+          <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+          <p conref="../shared/impala_common.xml#common/base64_error_handling"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p conref="../shared/impala_common.xml#common/base64_examples"/>
+        </dd>
+
+      </dlentry>
+
+      <dlentry id="base64encode" rev="2.6.0 IMPALA-2107">
+
+        <dt>
+          <codeph>base64encode(string str)</codeph>
+        </dt>
+
+        <dd>
+          <indexterm audience="hidden">base64encode() function</indexterm>
+          <b>Purpose:</b>
+          <p>
+            <b>Return type:</b> <codeph>string</codeph>
+          </p>
+          <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+          <p>
+            For general information about Base64 encoding, see
+            <xref keyref="base64"/>.
+          </p>
+          <p conref="../shared/impala_common.xml#common/base64_use_cases"/>
+          <p conref="../shared/impala_common.xml#common/base64_charset"/>
+          <p conref="../shared/impala_common.xml#common/base64_alignment"/>
+          <p conref="../shared/impala_common.xml#common/example_blurb"/>
+          <p conref="../shared/impala_common.xml#common/base64_examples"/>
+        </dd>
+
+      </dlentry>
+
       <dlentry rev="2.3.0" id="btrim">
 
         <dt>