You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2019/07/25 17:45:22 UTC

[impala] 04/04: IMPALA-8641: Document compression codec zstd in Parquet

This is an automated email from the ASF dual-hosted git repository.

arodoni pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6d4cde7c926e0c002f976697d8437165cca4c247
Author: Abhishek <ar...@cloudera.com>
AuthorDate: Sat Jun 8 09:06:20 2019 -0700

    IMPALA-8641: Document compression codec zstd in Parquet
    
    Updated the documentation for query_option compression_codec to also
    include zstd.
    
    Change-Id: Id55dbc3297ec1560b04e4da2c93cc1aeb1e6fb2f
    Reviewed-on: http://gerrit.cloudera.org:8080/13910
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Alex Rodoni <ar...@cloudera.com>
---
 docs/topics/impala_compression_codec.xml | 23 ++++++++++++++++++++---
 docs/topics/impala_parquet.xml           |  8 ++++----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/docs/topics/impala_compression_codec.xml b/docs/topics/impala_compression_codec.xml
index 07caad3..8808bb7 100644
--- a/docs/topics/impala_compression_codec.xml
+++ b/docs/topics/impala_compression_codec.xml
@@ -31,6 +31,7 @@ under the License.
       <data name="Category" value="Parquet"/>
       <data name="Category" value="Snappy"/>
       <data name="Category" value="Gzip"/>
+      <data name="Category" value="Zstd"/>
       <data name="Category" value="Developers"/>
       <data name="Category" value="Data Analysts"/>
     </metadata>
@@ -56,11 +57,20 @@ under the License.
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
 
-<codeblock>SET COMPRESSION_CODEC=<varname>codec_name</varname>;</codeblock>
+<codeblock>SET COMPRESSION_CODEC=<varname>codec_name</varname>; // Supported for all codecs.
+SET COMPRESSION_CODEC=<varname>codec_name</varname>:<varname>compression_level</varname>; // Only supported for ZSTD.
+</codeblock>
 
     <p>
       The allowed values for this query option are <codeph>SNAPPY</codeph> (the default), <codeph>GZIP</codeph>,
-      and <codeph>NONE</codeph>.
+      <codeph>ZSTD</codeph>, and <codeph>NONE</codeph>.
+    </p>
+
+    <p>
+      <codeph>ZSTD</codeph> also supports setting a compression level. The lower the level, the faster the speed at
+      the cost of compression ratio. Compression levels from 1 up to 22 are supported for <codeph>ZSTD</codeph>.
+      The default compression level 3 is used, if one is not passed using the <codeph>compression_codec</codeph>
+      query option.
     </p>
 
     <note>
@@ -92,7 +102,14 @@ under the License.
 
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
 
-<codeblock>set compression_codec=gzip;
+<codeblock>
+set compression_codec=zstd; // Default compression level 3.
+insert into parquet_table_zstd_default_compressed select * from t1;
+
+set compression_codec=zstd:12; // Compression level 12.
+insert into parquet_table_zstd_highly_compressed select * from t1;
+
+set compression_codec=gzip;
 insert into parquet_table_highly_compressed select * from t1;
 
 set compression_codec=snappy;
diff --git a/docs/topics/impala_parquet.xml b/docs/topics/impala_parquet.xml
index 9a6b01b..1a19841 100644
--- a/docs/topics/impala_parquet.xml
+++ b/docs/topics/impala_parquet.xml
@@ -449,10 +449,10 @@ under the License.
         underlying compression is controlled by the <codeph>COMPRESSION_CODEC</codeph> query
         option. (Prior to Impala 2.0, the query option name was
         <codeph>PARQUET_COMPRESSION_CODEC</codeph>.) The allowed values for this query option
-        are <codeph>snappy</codeph> (the default), <codeph>gzip</codeph>, and
-        <codeph>none</codeph>. The option value is not case-sensitive. If the option is set to
-        an unrecognized value, all kinds of queries will fail due to the invalid option setting,
-        not just queries involving Parquet tables.
+        are <codeph>snappy</codeph> (the default), <codeph>gzip</codeph>, <codeph>zstd</codeph>
+        and <codeph>none</codeph>. The option value is not case-sensitive. If the option is set
+        to an unrecognized value, all kinds of queries will fail due to the invalid option
+        setting, not just queries involving Parquet tables.
       </p>
 
     </conbody>