You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/12/11 18:47:12 UTC

(impala) 01/02: IMPALA-12001: Informative error message for complex types with DISTINCT

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4c28ab02c658b559c47cf9993448218e228d61c9
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Tue Dec 5 15:00:15 2023 +0100

    IMPALA-12001: Informative error message for complex types with DISTINCT
    
    Before this change, queries with SELECT DISTINCT on a complex type
    failed.
    
    With structs, we got a FE exception:
      use functional_parquet;
      select distinct(struct_val) from alltypes_structs;
    
      ERROR: IllegalStateException: null
    
    With collections, the BE hits a DCHECK and crashes:
      use functional_parquet;
      select distinct(arr1) from complextypes_arrays;
    
      Socket error 104: [Errno 104] Connection reset by peer
    
    Aggregate functions with complex DISTINCT parameters also failed without
    a clear error message. For example:
      select count(distinct struct_val) from alltypes_structs;
      select count(distinct arr1) from complextypes_arrays;
    
    To support DISTINCT for complex types we would need to implement
    equality and hash for them. We are not planning to do it in the near
    future, so this change introduces informative error messages in these
    cases.
    
    Testing:
     - added test queries for SELECT DISTINCT and SELECT COUNT(DISTINCT ...)
       with arrays, maps and structs, expecting the correct error messages.
    
    Change-Id: Ibe2642d1683a10fd05a95e2ad8470d16f0d5242c
    Reviewed-on: http://gerrit.cloudera.org:8080/20752
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/MultiAggregateInfo.java | 16 ++++++++++++++++
 .../java/org/apache/impala/analysis/SelectStmt.java    |  6 ++++++
 .../queries/QueryTest/nested-array-in-select-list.test | 12 +++++++++++-
 .../queries/QueryTest/nested-map-in-select-list.test   | 18 ++++++++++++++----
 .../QueryTest/nested-struct-in-select-list.test        | 13 +++++++++++--
 5 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java b/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java
index 48cb37f96..24c12fda9 100644
--- a/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java
+++ b/fe/src/main/java/org/apache/impala/analysis/MultiAggregateInfo.java
@@ -249,6 +249,10 @@ public class MultiAggregateInfo {
         groupingBuiltinExprs.add(aggExpr);
       } else if (aggExpr.isDistinct()) {
         List<Expr> children = AggregateFunction.getCanonicalDistinctAggChildren(aggExpr);
+
+        // Complex types are not supported as DISTINCT parameters of aggregate functions.
+        checkComplexDistinctParams(aggExpr, children);
+
         int groupIdx = distinctExprs.indexOf(children);
         List<FunctionCallExpr> groupAggFns;
         if (groupIdx == -1) {
@@ -342,6 +346,18 @@ public class MultiAggregateInfo {
     }
   }
 
+  private static void checkComplexDistinctParams(FunctionCallExpr aggExpr,
+      List<Expr> params) throws AnalysisException {
+    for (Expr child : params) {
+      if (child.getType().isComplexType()) {
+        throw new AnalysisException("Complex types are not supported " +
+            "as DISTINCT parameters of aggregate functions. Distinct parameter: '" +
+            child.toSql() + "', type: '" + child.getType().toSql() +
+            "' in aggregate function '" + aggExpr.toSql() + "'.");
+      }
+    }
+  }
+
   /**
    * Implementation of analyze() for aggregation with grouping sets.
    * Does not handle distinct aggregate functions yet.
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 25a79c189..48c123759 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -562,6 +562,12 @@ public class SelectStmt extends QueryStmt {
       }
 
       for (Expr expr: resultExprs_) {
+        if (selectList_.isDistinct() && expr.getType().isComplexType()) {
+          throw new AnalysisException("Complex types are not supported " +
+              "in SELECT DISTINCT clauses. Expr: '" + expr.toSql() + "', type: '"
+              + expr.getType().toSql() + "'.");
+        }
+
         if (expr.getType().isArrayType()) {
           ArrayType arrayType = (ArrayType) expr.getType();
           if (!arrayType.getItemType().isSupported()) {
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
index 5c84778bd..4d4abc5ab 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
@@ -425,4 +425,14 @@ on a.id = b.id where a.id < 3;
 0,'NULL','NULL'
 ---- TYPES
 INT,STRING,STRING
-=====
+====
+---- QUERY
+select distinct arr1 from complextypes_arrays
+---- CATCH
+AnalysisException: Complex types are not supported in SELECT DISTINCT clauses.
+====
+---- QUERY
+select count(distinct arr1) from complextypes_arrays
+---- CATCH
+AnalysisException: Complex types are not supported as DISTINCT parameters of aggregate functions.
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
index 337dfbd55..aedebd289 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
@@ -392,7 +392,7 @@ select id, map_1d, map_2d, map_3d, arr_int_3d, map_map_array from collection_tbl
 3,'{645:"fourth even-toed ungulate",5:"fifth"}','{1:{10:"ten",20:"twentieth even-toed ungulate"},2:{30:"thirty even-toed ungulates",40:"forty"}}','{1:{10:{100:"hundred",200:"two hundred even-toed ungulates"},20:{300:"three hundred even-toed ungulates",400:"four hundred"}},2:{30:{500:"five hundred even-toed ungulates",600:"six hundred"},40:{700:"seven hundred even-toed ungulates",800:"eight hundred"}}}','[[[1,null,2,null],[null,15]],[[null,4]]]','{1:{10:[100,200],20:[300,400]},2:{30:[500, [...]
 ---- TYPES
 INT,STRING,STRING,STRING,STRING,STRING
-=====
+====
 ---- QUERY
 select id, map_1d, map_2d, mma.value mma_value, ma.value ma_value
 from collection_tbl c, c.map_map_array mma, mma.value ma;
@@ -411,7 +411,7 @@ from collection_tbl c, c.map_map_array mma, mma.value ma;
 3,'{645:"fourth even-toed ungulate",5:"fifth"}','{1:{10:"ten",20:"twentieth even-toed ungulate"},2:{30:"thirty even-toed ungulates",40:"forty"}}','{30:[500,600],40:[700,800]}','[700,800]'
 ---- TYPES
 INT,STRING,STRING,STRING,STRING
-=====
+====
 ---- QUERY
 -- Test that map keys are printed correctly.
 set CONVERT_LEGACY_HIVE_PARQUET_UTC_TIMESTAMPS=1;
@@ -435,7 +435,7 @@ from collection_tbl;
 '{true:"true even-toed ungulate",false:"false"}','{-1:"a nice even-toed ungulate",0:"best even-toed ungulate",1:"c"}','{-1:"a nice even-toed ungulate",0:"best even-toed ungulate",1:"c"}','{-1:"a nice even-toed ungulate",0:"best even-toed ungulate",1:"c"}','{-1.5:"a nice even-toed ungulate",0.25:"best even-toed ungulate",1.75:"c"}','{-1.5:"a nice even-toed ungulate",0.25:"best even-toed ungulate",1.75:"c"}','{-1.8:"a nice even-toed ungulate",0.2:"best even-toed ungulate",1.2:"c"}','{"one" [...]
 ---- TYPES
 STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING
-=====
+====
 ---- QUERY
 -- Test that map keys are printed correctly with STRINGIFY_MAP_KEYS=true.
 set CONVERT_LEGACY_HIVE_PARQUET_UTC_TIMESTAMPS=1;
@@ -460,4 +460,14 @@ from collection_tbl;
 '{"true":"true even-toed ungulate","false":"false"}','{"-1":"a nice even-toed ungulate","0":"best even-toed ungulate","1":"c"}','{"-1":"a nice even-toed ungulate","0":"best even-toed ungulate","1":"c"}','{"-1":"a nice even-toed ungulate","0":"best even-toed ungulate","1":"c"}','{"-1.5":"a nice even-toed ungulate","0.25":"best even-toed ungulate","1.75":"c"}','{"-1.5":"a nice even-toed ungulate","0.25":"best even-toed ungulate","1.75":"c"}','{"-1.8":"a nice even-toed ungulate","0.2":"best [...]
 ---- TYPES
 STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING
-=====
+====
+---- QUERY
+select distinct int_map from complextypestbl
+---- CATCH
+AnalysisException: Complex types are not supported in SELECT DISTINCT clauses.
+====
+---- QUERY
+select count(distinct int_map) from complextypestbl;
+---- CATCH
+AnalysisException: Complex types are not supported as DISTINCT parameters of aggregate functions.
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
index 846c78d2f..e00500632 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
@@ -452,8 +452,7 @@ from complextypes_nested_structs;
 INT,STRING
 ====
 ---- QUERY
-# Subquery that returns a complex type is not supported.
-# IMPALA-9500
+# Complex types in IN predicates are not supported.
 select outer_struct
 from complextypes_nested_structs
 where outer_struct in
@@ -462,3 +461,13 @@ where outer_struct in
 AnalysisException: A subquery can't return complex types. (SELECT outer_struct FROM functional_parquet.complex
 types_nested_structs)
 ====
+---- QUERY
+select distinct outer_struct from complextypes_nested_structs
+---- CATCH
+AnalysisException: Complex types are not supported in SELECT DISTINCT clauses.
+====
+---- QUERY
+select count(distinct outer_struct) from complextypes_nested_structs;
+---- CATCH
+AnalysisException: Complex types are not supported as DISTINCT parameters of aggregate functions.
+====