You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2024/01/31 00:23:05 UTC

(impala) 04/04: IMPALA-12763: Union with string struct crashes in ASAN

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 46f04313212952ae2e8f432cb622457918bae6cd
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Mon Jan 29 16:11:31 2024 +0100

    IMPALA-12763: Union with string struct crashes in ASAN
    
    In ASAN builds, if we UNION ALL an array containing a struct of a string
    with itself, Impala crashes. This is how to reproduce it:
    
    In Hive:
      create table su (arr ARRAY<STRUCT<s: STRING>>) stored as parquet;
      insert into su values (array(named_struct("s", "A")));
    
    In Impala:
      select 1, arr from su
        union all select 2, arr from su;
    
    The ASAN error message indicates a heap-use-after-free.
    
    Normally, UNIONs of structs are not supported yet (see IMPALA-10752),
    but if the struct is inside an array it is allowed now. This was
    probably not intentional and it leads to the above error, so this change
    disables structs in unions completely, including embedded structs.
    
    Testing:
     - adjusted existing tests
     - added a query that tests that types with embedded structs are not
       allowed in a UNION statement, in mixed-collections-and-structs.test
    
    Change-Id: Id728f1254b74636be594a33313a478b0b77c7ae4
    Reviewed-on: http://gerrit.cloudera.org:8080/20970
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |  8 +++++---
 .../main/java/org/apache/impala/catalog/Type.java  | 22 ++++++++++++++++++++++
 .../apache/impala/analysis/AnalyzeStmtsTest.java   | 10 ++++++++--
 .../QueryTest/mixed-collections-and-structs.test   |  6 ++++++
 .../QueryTest/nested-array-in-select-list.test     |  2 +-
 .../QueryTest/nested-map-in-select-list.test       |  2 +-
 .../queries/QueryTest/struct-in-select-list.test   |  2 +-
 7 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index f6ce7b2bd..600cc9a8f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -48,6 +48,7 @@ import org.apache.impala.authorization.PrivilegeRequest;
 import org.apache.impala.authorization.PrivilegeRequestBuilder;
 import org.apache.impala.authorization.TableMask;
 import org.apache.impala.authorization.User;
+import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.DatabaseNotFoundException;
 import org.apache.impala.catalog.FeCatalog;
@@ -62,6 +63,7 @@ import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.IcebergTimeTravelTable;
 import org.apache.impala.catalog.KuduTable;
+import org.apache.impala.catalog.MapType;
 import org.apache.impala.catalog.MaterializedViewHdfsTable;
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.StructField;
@@ -3400,10 +3402,10 @@ public class Analyzer {
       // Initialize with type of i-th expr in first list.
       Type compatibleType = firstList.get(i).getType();
       if (firstList.get(i) instanceof SlotRef &&
-          compatibleType.isStructType()) {
+          compatibleType.containsStruct()) {
         throw new AnalysisException(String.format(
-            "Set operations don't support STRUCT type. %s in %s", compatibleType.toSql(),
-            firstList.get(i).toSql()));
+            "Set operations don't support STRUCT types or types containing " +
+            "STRUCT types. %s in %s.", compatibleType.toSql(), firstList.get(i).toSql()));
       }
       widestExprs.add(firstList.get(i));
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java b/fe/src/main/java/org/apache/impala/catalog/Type.java
index f0246d2f9..98c051da8 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Type.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Type.java
@@ -243,6 +243,28 @@ public abstract class Type {
     return this instanceof CollectionStructType;
   }
 
+  /**
+   * Returns true if this type
+   *  - is a struct type or
+   *  - contains a struct type (recursively); for example
+   *    ARRAY<STRUCT<i: INT>>.
+   */
+  public boolean containsStruct() {
+    if (isStructType()) return true;
+
+    if (isArrayType()) {
+      ArrayType arrayType = (ArrayType) this;
+      return arrayType.getItemType().containsStruct();
+    } else if (isMapType()) {
+      MapType mapType = (MapType) this;
+      return mapType.getKeyType().containsStruct() ||
+          mapType.getValueType().containsStruct();
+    }
+
+    return false;
+  }
+
+
   /**
    * Returns true if this type
    *  - is a collection type or
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index fd0dbc043..6da0a5b45 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -1016,7 +1016,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
     // Empty star expansion, but non empty result exprs.
     AnalyzesOk("select 1, * from only_complex_types");
 
-    // Struct in select list works only if codegen is OFF.
+    // Struct in select list.
     AnalysisContext ctx = createAnalysisCtx();
     AnalyzesOk("select alltypes from functional_orc_def.complextypes_structs", ctx);
     AnalyzesOk("select int_array_col from functional.allcomplextypes");
@@ -1030,7 +1030,13 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         "collection 'int_array_col' of type 'ARRAY<INT>'");
     AnalysisError("select tiny_struct from functional_orc_def.complextypes_structs " +
         "union all select tiny_struct from functional_orc_def.complextypes_structs", ctx,
-        "Set operations don't support STRUCT type. STRUCT<b:BOOLEAN> in tiny_struct");
+        "Set operations don't support STRUCT types or types containing STRUCT types." +
+        " STRUCT<b:BOOLEAN> in tiny_struct");
+    AnalysisError("select all_mix from functional_parquet.collection_struct_mix " +
+        "union all select all_mix from functional_parquet.collection_struct_mix", ctx,
+        "Set operations don't support STRUCT types or types containing STRUCT types. " +
+        "MAP<INT,STRUCT<big:STRUCT<arr:ARRAY<STRUCT<inner_arr:ARRAY<ARRAY<INT>>," +
+        "m:TIMESTAMP>>,n:INT>,small:STRUCT<str:STRING,i:INT>>> in all_mix.");
     AnalyzesOk("select 1 from " +
         "(select int_array_col from functional.allcomplextypes) v");
     AnalyzesOk("select int_array_col from " +
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test b/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test
index 56175127a..732aac87c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mixed-collections-and-structs.test
@@ -549,3 +549,9 @@ from collection_struct_mix, collection_struct_mix.arr_contains_nested_struct arr
 ---- TYPES
 INT,STRING,SMALLINT
 ====
+---- QUERY
+# Union of types containing structs are not allowed.
+select all_mix from collection_struct_mix union all select all_mix from collection_struct_mix
+---- CATCH
+AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. MAP<INT,STRUCT<big:STRUCT<arr:ARRAY<STRUCT<inner_arr:ARRAY<ARRAY<INT>>,m:TIMESTAMP>>,n:INT>,small:STRUCT<str:STRING,i:INT>>> in all_mix.
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
index 4d4abc5ab..13dae41ea 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
@@ -133,7 +133,7 @@ tinyint,string,string
 select 1, struct_contains_arr, struct_contains_nested_arr, all_mix from collection_struct_mix
   union all select 2, struct_contains_arr, struct_contains_nested_arr, all_mix from collection_struct_mix
 ---- CATCH
-AnalysisException: Set operations don't support STRUCT type. STRUCT<arr:ARRAY<INT>> in struct_contains_arr
+AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. STRUCT<arr:ARRAY<INT>> in struct_contains_arr.
 ====
 ---- QUERY
 select 1 from (select int_array from complextypestbl) s
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
index aedebd289..e21747c08 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
@@ -193,7 +193,7 @@ tinyint,string,string
 select 1, struct_contains_map, all_mix from collection_struct_mix
   union all select 2, struct_contains_map, all_mix from collection_struct_mix
 ---- CATCH
-AnalysisException: Set operations don't support STRUCT type. STRUCT<m:MAP<INT,STRING>> in struct_contains_map
+AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. STRUCT<m:MAP<INT,STRING>> in struct_contains_map.
 ====
 ---- QUERY
 select 1 from (select int_map from complextypestbl) s
diff --git a/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
index 082892649..546d1c527 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/struct-in-select-list.test
@@ -603,7 +603,7 @@ select id, tiny_struct from complextypes_structs
 union all
 select id, tiny_struct from complextypes_structs;
 ---- CATCH
-AnalysisException: Set operations don't support STRUCT type. STRUCT<b:BOOLEAN> in tiny_struct
+AnalysisException: Set operations don't support STRUCT types or types containing STRUCT types. STRUCT<b:BOOLEAN> in tiny_struct.
 ====
 ---- QUERY
 # Ordering by struct column is not supported.