You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2022/12/13 13:30:51 UTC

[impala] branch master updated (da304c1fe -> b88cfadbb)

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from da304c1fe IMPALA-11784: Don't call Iceberg's planFiles redundantly during table load
     new 7f43afc27 IMPALA-11791: [DOCS] Document IMPALA-9499 query option
     new b88cfadbb IMPALA-11777: Bump CDP_BUILD_NUMBER to get HIVE-24498

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 bin/impala-config.sh                               | 22 +++---
 docs/impala.ditamap                                |  1 +
 docs/topics/impala_complex_types.xml               | 89 ++++++++++++++++++----
 ...f8_mode.xml => impala_expand_complex_types.xml} | 34 +++++----
 fe/pom.xml                                         | 16 ++++
 java/executor-deps/pom.xml                         |  8 ++
 6 files changed, 129 insertions(+), 41 deletions(-)
 copy docs/topics/{impala_utf8_mode.xml => impala_expand_complex_types.xml} (57%)


[impala] 02/02: IMPALA-11777: Bump CDP_BUILD_NUMBER to get HIVE-24498

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b88cfadbbde45ccbe5e4b6d644d046a9a275e31c
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Dec 8 21:00:42 2022 +0100

    IMPALA-11777: Bump CDP_BUILD_NUMBER to get HIVE-24498
    
    Without HIVE-24498 we get java.lang.NoClassDefFoundError exceptions
    when we write Iceberg tables via Hive. This makes it hard to write
    interop tests between Hive and Impala which use Iceberg tables.
    
    I also exclude some private Java components to get things built.
    
    Change-Id: I486c2b1b224f72e082e331a57cf25a37ebb9fa54
    Reviewed-on: http://gerrit.cloudera.org:8080/19331
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Laszlo Gaal <la...@cloudera.com>
    Reviewed-by: Tamas Mate <tm...@apache.org>
---
 bin/impala-config.sh       | 22 +++++++++++-----------
 fe/pom.xml                 | 16 ++++++++++++++++
 java/executor-deps/pom.xml |  8 ++++++++
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 6d8c369cb..88189bb83 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -198,19 +198,19 @@ fi
 : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
 export IMPALA_TOOLCHAIN_HOST
 
-export CDP_BUILD_NUMBER=33375775
+export CDP_BUILD_NUMBER=34919320
 export CDP_MAVEN_REPOSITORY=\
 "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
-export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.16.0-233
-export CDP_HADOOP_VERSION=3.1.1.7.2.16.0-233
-export CDP_HBASE_VERSION=2.4.6.7.2.16.0-233
-export CDP_HIVE_VERSION=3.1.3000.7.2.16.0-233
-export CDP_ICEBERG_VERSION=0.14.1.7.2.16.0-233
-export CDP_KNOX_VERSION=1.3.0.7.2.16.0-233
-export CDP_OZONE_VERSION=1.3.0.7.2.16.0-233
-export CDP_PARQUET_VERSION=1.10.99.7.2.16.0-233
-export CDP_RANGER_VERSION=2.3.0.7.2.16.0-233
-export CDP_TEZ_VERSION=0.9.1.7.2.16.0-233
+export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.16.0-272
+export CDP_HADOOP_VERSION=3.1.1.7.2.16.0-272
+export CDP_HBASE_VERSION=2.4.6.7.2.16.0-272
+export CDP_HIVE_VERSION=3.1.3000.7.2.16.0-272
+export CDP_ICEBERG_VERSION=0.14.1.7.2.16.0-272
+export CDP_KNOX_VERSION=1.3.0.7.2.16.0-272
+export CDP_OZONE_VERSION=1.3.0.7.2.16.0-272
+export CDP_PARQUET_VERSION=1.10.99.7.2.16.0-272
+export CDP_RANGER_VERSION=2.3.0.7.2.16.0-272
+export CDP_TEZ_VERSION=0.9.1.7.2.16.0-272
 
 # Ref: https://infra.apache.org/release-download-pages.html#closer
 : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
diff --git a/fe/pom.xml b/fe/pom.xml
index 09b96dd18..9909ec11a 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -134,6 +134,14 @@ under the License.
         <!-- We need to exclude ranger-plugins-audit here, or the build will fail
         to resolve dependencies for cruise-control-metrics-reporter, which is
         unneeded and not uploaded to toolchain. -->
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>jwtprovider-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>jwtprovider-knox</artifactId>
+        </exclusion>
         <exclusion>
           <groupId>org.apache.ranger</groupId>
           <artifactId>ranger-plugins-audit</artifactId>
@@ -160,6 +168,14 @@ under the License.
         <!-- Ranger jar also includes solr artifacts which we not exclude here since
         they are used during the instantiation of RangerAuthorizationFactory
         (See IMPALA-10644 for more details. -->
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>jwtprovider-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>jwtprovider-knox</artifactId>
+        </exclusion>
         <exclusion>
           <groupId>org.apache.kafka</groupId>
           <artifactId>*</artifactId>
diff --git a/java/executor-deps/pom.xml b/java/executor-deps/pom.xml
index 8fee73e96..428b78145 100644
--- a/java/executor-deps/pom.xml
+++ b/java/executor-deps/pom.xml
@@ -96,6 +96,14 @@ under the License.
           <groupId>com.amazonaws</groupId>
           <artifactId>aws-java-sdk-bundle</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>jwtprovider-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>jwtprovider-knox</artifactId>
+        </exclusion>
         <exclusion>
           <groupId>org.eclipse.jetty</groupId>
           <artifactId>*</artifactId>


[impala] 01/02: IMPALA-11791: [DOCS] Document IMPALA-9499 query option

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7f43afc2774ae2b363289b9958ff70e215efb5e6
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Mon Dec 12 18:07:21 2022 +0100

    IMPALA-11791: [DOCS] Document IMPALA-9499 query option
    
    IMPALA-9499 introduced the EXPAND_COMPLEX_TYPES query option which is
    documented in this change.
    
    Also updates docs/topics/impala_complex_types.xml - complex types are
    now allowed in the select list with the exceptions of collections
    embedded in structs and structs embedded in collections.
    
    Change-Id: I1f0a6b402de1ed9bb6aa05987a6ff8e6d62accb5
    Reviewed-on: http://gerrit.cloudera.org:8080/19348
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 docs/impala.ditamap                         |  1 +
 docs/topics/impala_complex_types.xml        | 89 +++++++++++++++++++++++------
 docs/topics/impala_expand_complex_types.xml | 61 ++++++++++++++++++++
 3 files changed, 135 insertions(+), 16 deletions(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 7eac4c329..b8c3dad72 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -257,6 +257,7 @@ under the License.
           <topicref href="topics/impala_timezone.xml"/>
           <topicref href="topics/impala_topn_bytes_limit.xml"/>
           <topicref href="topics/impala_utf8_mode.xml"/>
+          <topicref href="topics/impala_expand_complex_types.xml"/>
         </topicref>
       </topicref>
       <topicref href="topics/impala_show.xml"/>
diff --git a/docs/topics/impala_complex_types.xml b/docs/topics/impala_complex_types.xml
index ba022f50d..9432f3b07 100644
--- a/docs/topics/impala_complex_types.xml
+++ b/docs/topics/impala_complex_types.xml
@@ -692,9 +692,9 @@ under the License.
         </p>
 
         <p>
-          Because the complex columns are omitted from the result set of an Impala <codeph>SELECT *</codeph> or <codeph>SELECT
-          <varname>col_name</varname></codeph> query, and because Impala currently does not support writing Parquet files with complex type
-          columns, you cannot use the <codeph>CREATE TABLE AS SELECT</codeph> syntax to create a table with nested type columns.
+          Because Impala currently does not support writing Parquet files with complex type columns,
+          you cannot use the <codeph>CREATE TABLE AS SELECT</codeph> syntax to create a table with
+          nested type columns.
         </p>
 
         <note>
@@ -971,29 +971,86 @@ STORED AS PARQUET;
 
 <!-- Hive does the JSON output business: http://www.datascience-labs.com/hive/hiveql-data-manipulation/ -->
 
-<!-- SELECT * works but skips any nested type coloumns. -->
+          <p>
+            The result set of an Impala query can contain both scalar and complex types. A query can
+            either retrieve the complex values directly or <q>unpack</q> the elements and fields
+            within a complex type using join queries, with the limitation that direct retrieval is
+            currently not supported for complex types where collections (maps or arrays) are
+            embedded within structs or structs are embedded within collections.
+          </p>
+
+          <p>
+            Here are some complex types that are supported in the select list:
+            <ul>
+              <li>
+                <p>
+                  <codeph>STRUCT&lt;i: INT&gt;</codeph>
+                </p>
+              </li>
+              <li>
+                <p>
+                  <codeph>STRUCT&lt;s: STRUCT&lt;i: INT&gt;&gt;</codeph>
+                </p>
+              </li>
+              <li>
+                <p>
+                  <codeph>ARRAY&lt;INT&gt;</codeph>
+                </p>
+              </li>
+              <li>
+                <p>
+                  <codeph>ARRAY&lt;ARRAY&lt;INT&gt;&gt;</codeph>
+                </p>
+              </li>
+              <li>
+                <p>
+                  <codeph>ARRAY&lt;MAP&lt;INT&gt;&gt;</codeph>
+                </p>
+              </li>
+            </ul>
+          </p>
 
           <p>
-            The result set of an Impala query always contains all scalar types; the elements and fields within any complex type queries must
-            be <q>unpacked</q> using join queries. A query cannot directly retrieve the entire value for a complex type column. Impala
-            returns an error in this case. Queries using <codeph>SELECT *</codeph> are allowed for tables with complex types, but the
-            columns with complex types are skipped.
+            And here are some that are not supported in the select list:
+            <ul>
+              <li>
+                <p>
+                  <codeph>STRUCT&lt;a: ARRAY&lt;INT&gt;&gt;</codeph>
+                </p>
+              </li>
+              <li>
+                <p>
+                  <codeph>ARRAY&lt;STRUCT&lt;i: INT&gt;&gt;</codeph>
+                </p>
+              </li>
+              <li>
+                <p>
+                  <codeph>MAP&lt;INT, STRUCT&lt;s: STRING&gt;&gt;</codeph>
+                </p>
+              </li>
+            </ul>
           </p>
 
           <p>
-            The following example shows how referring directly to a complex type column returns an error, while <codeph>SELECT *</codeph> on
-            the same table succeeds, but only retrieves the scalar columns.
+            Because of backward compatibility with earlier versions of Impala that did not support
+            complex types in the result set, queries using <codeph>SELECT *</codeph> skip complex
+            types by default. To include complex types in <codeph>SELECT *</codeph> queries, set the
+            EXPAND_COMPLEX_TYPES query option to true (see the
+            <xref href="impala_expand_complex_types.xml"/>).
           </p>
 
-          <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
+          <p>
+            The following example shows how referring directly to a column with a complex type where
+            a struct is embedded in a collection (an array) returns an error, while
+            <codeph>SELECT *</codeph> on the same table succeeds, but only retrieves the scalar
+            columns. Note that if EXPAND_COMPLEX_TYPES is true, the <codeph>SELECT *</codeph> query
+            also fails with the same error.
+          </p>
 
-<!-- Original error message:
-ERROR: AnalysisException: Expr 'c_orders' in select list returns a complex type 'ARRAY<STRUCT<o_orderkey:BIGINT,o_orderstatus:STRING,o_totalprice:DECIMAL(12,2),o_orderdate:STRING,o_orderpriority:STRING,o_clerk:STRING,o_shippriority:INT,o_comment:STRING,o_lineitems:ARRAY<STRUCT<l_partkey:BIGINT,l_suppkey:BIGINT,l_linenumber:INT,l_quantity:DECIMAL(12,2),l_extendedprice:DECIMAL(12,2),l_discount:DECIMAL(12,2),l_tax:DECIMAL(12,2),l_returnflag:STRING,l_linestatus:STRING,l_shipdate:STRING,l_com [...]
--->
+          <note conref="../shared/impala_common.xml#common/complex_type_schema_pointer"/>
 
 <codeblock><![CDATA[SELECT c_orders FROM customer LIMIT 1;
-ERROR: AnalysisException: Expr 'c_orders' in select list returns a complex type 'ARRAY<STRUCT<o_orderkey:BIGINT,o_orderstatus:STRING, ... l_receiptdate:STRING,l_shipinstruct:STRING,l_shipmode:STRING,l_comment:STRING>>>>'.
-Only scalar types are allowed in the select list.
+ERROR: AnalysisException: STRUCT type inside collection types is not supported.
 
 -- Original column has several scalar and one complex column.
 DESCRIBE customer;
diff --git a/docs/topics/impala_expand_complex_types.xml b/docs/topics/impala_expand_complex_types.xml
new file mode 100644
index 000000000..1fa39eca9
--- /dev/null
+++ b/docs/topics/impala_expand_complex_types.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="expand_complex_types">
+
+ <title>EXPAND_COMPLEX_TYPES Query Option</title>
+ <titlealts audience="PDF"><navtitle>EXPAND_COMPLEX_TYPES</navtitle></titlealts>
+ <prolog>
+  <metadata>
+   <data name="Category" value="Impala"/>
+   <data name="Category" value="Impala Query Options"/>
+   <data name="Category" value="Troubleshooting"/>
+   <data name="Category" value="Querying"/>
+   <data name="Category" value="Developers"/>
+   <data name="Category" value="Data Analysts"/>
+  </metadata>
+ </prolog>
+
+ <conbody>
+  <p>
+   <indexterm audience="hidden">EXPAND_COMPLEX_TYPES Query Option</indexterm> Older versions of
+   Impala did not support complex types in the select list, and therefore a <codeph>SELECT
+   *</codeph> statement only expanded to primitive types even when the table contained complex-typed
+   columns. Support for complex types in the select list has since been added, but for backward
+   compatibility the behaviour that <q>*</q> expressions skip complex types has been kept as the
+   default. If the EXPAND_COMPLEX_TYPES query option is set to true, <q>*</q> expressions will
+   include complex types as well.
+  </p>
+
+  <p>
+   Note: even if EXPAND_COMPLEX_TYPES is false, complex-typed columns that are listed explicitly in
+   the select list (i.e. not as a <q>*</q> expression) will be included in the result.
+  </p>
+
+  <p><b>Type:</b>BOOLEAN</p>
+  <p><b>Default:</b>FALSE</p>
+  <p><b>Added in:</b>Impala 4.2</p>
+  <p conref="../shared/impala_common.xml#common/related_info"/>
+  <p>
+   <xref href="impala_complex_types.xml"/>,
+  </p>
+ </conbody>
+</concept>
+