You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/08 23:37:23 UTC

[impala] 26/27: IMPALA-11434: Fix analysis of multiple more than 1d arrays in select list

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0766bc9c64fba0d883a2dc21e8327563190023c0
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Jul 14 20:32:02 2022 +0200

    IMPALA-11434: Fix analysis of multiple more than 1d arrays in select list
    
    More than 1d arrays in select list tried to register a
    CollectionTableRef with name "item" for the inner arrays,
    leading to name collision if there was more than one such array.
    
    The logic is changed to always use the full path as implicit alias
    in CollectionTableRefs backing arrays in select list.
    
    As a side effect this leads to using the fully qualified names
    in expressions in the explain plans of queries that use arrays
    from views. This is not an intended change, but I don't consider
    it to be critical. Created IMPALA-11452 to deal with more
    sophisticated alias handling in collections.
    
    Testing:
    - added a new table to testdata and a regression test
    
    Change-Id: I6f2b6cad51fa25a6f6932420eccf1b0a964d5e4e
    Reviewed-on: http://gerrit.cloudera.org:8080/18734
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |  5 ++---
 .../apache/impala/analysis/CollectionTableRef.java |  8 +++----
 .../org/apache/impala/analysis/FromClause.java     |  7 +++---
 .../org/apache/impala/analysis/StmtRewriter.java   |  9 +++++---
 .../main/java/org/apache/impala/catalog/Type.java  |  4 ++++
 .../apache/impala/analysis/AnalyzeStmtsTest.java   | 12 +++++------
 .../functional/functional_schema_template.sql      | 25 ++++++++++++++++++++++
 .../datasets/functional/schema_constraints.csv     |  2 ++
 .../queries/PlannerTest/zipping-unnest.test        |  8 +++----
 .../QueryTest/nested-array-in-select-list.test     | 11 +++++++++-
 10 files changed, 65 insertions(+), 26 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index d9dad77ae..c9d23db28 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -870,7 +870,7 @@ public class Analyzer {
           table instanceof FeDataSourceTable);
       return new BaseTableRef(tableRef, resolvedPath);
     } else {
-      return new CollectionTableRef(tableRef, resolvedPath);
+      return new CollectionTableRef(tableRef, resolvedPath, false);
     }
   }
 
@@ -1654,8 +1654,7 @@ public class Analyzer {
     collectionTableRawPath.addAll(rawPath);
 
     TableRef tblRef = new TableRef(collectionTableRawPath, null);
-    CollectionTableRef collTblRef = new CollectionTableRef(tblRef, slotPath);
-    collTblRef.setInSelectList(true);
+    CollectionTableRef collTblRef = new CollectionTableRef(tblRef, slotPath, true);
     collTblRef.analyze(this);
 
     Preconditions.checkState(collTblRef.getCollectionExpr() instanceof SlotRef);
diff --git a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
index efacd578b..521a4bb09 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
@@ -52,16 +52,15 @@ public class CollectionTableRef extends TableRef {
    * its resolved path. Sets table aliases and join-related attributes.
    * If inSelectList is true, then the collection won't be flattened during execution.
    */
-  public CollectionTableRef(TableRef tableRef, Path resolvedPath) {
+  public CollectionTableRef(TableRef tableRef, Path resolvedPath, boolean inSelectList) {
     super(tableRef);
     Preconditions.checkState(resolvedPath.isResolved());
     resolvedPath_ = resolvedPath;
+    inSelectList_ = inSelectList;
     // Use the last path element as an implicit alias if no explicit alias was given.
     if (hasExplicitAlias()) return;
-    boolean rootTableHasExplicitAlias = resolvedPath.getRootDesc() != null
-        && resolvedPath.getRootDesc().hasExplicitAlias();
     String implicitAlias = rawPath_.get(rawPath_.size() - 1).toLowerCase();
-    if (rootTableHasExplicitAlias) {
+    if (rawPath_.size() > 1) {
       // Use the full path from the root table alias to be able to distinguish
       // among collection columns with the same name.
       aliases_ =
@@ -204,7 +203,6 @@ public class CollectionTableRef extends TableRef {
 
   @Override
   public boolean isCollectionInSelectList() { return inSelectList_; }
-  public void setInSelectList(boolean value) { inSelectList_ = value; }
 
   @Override
   protected CollectionTableRef clone() { return new CollectionTableRef(this); }
diff --git a/fe/src/main/java/org/apache/impala/analysis/FromClause.java b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
index dbe60aaf7..765980b90 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FromClause.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
@@ -176,9 +176,10 @@ public class FromClause extends StmtNode implements Iterable<TableRef> {
         // contrary to the intended semantics of reset(). We could address this issue by
         // changing the WITH-clause analysis to register local views that have
         // fully-qualified table refs, and then remove the full qualification here.
-        if (!(origTblRef instanceof CollectionTableRef
-            && ((CollectionTableRef)origTblRef).isRelative())) {
-          newTblRef.rawPath_ = origTblRef.getResolvedPath().getFullyQualifiedRawPath();
+        Path oldPath = origTblRef.getResolvedPath();
+        if (oldPath.getRootDesc() == null
+            || !oldPath.getRootDesc().getType().isCollectionStructType()) {
+          newTblRef.rawPath_ = oldPath.getFullyQualifiedRawPath();
         }
         set(i, newTblRef);
       }
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index ad060b209..29669a2d4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -1828,9 +1828,12 @@ public class StmtRewriter {
       newCollPath.add(0, alias);
       // Remove the alias of the old collection ref from the analyzer. We need to add
       // the new collection ref with the same old alias.
-      analyzer.removeAlias(collTblRef.getUniqueAlias());
-      TableRef newCollTblRef =
-          TableRef.newTableRef(analyzer, newCollPath, collTblRef.getUniqueAlias());
+      String[] old_aliases = collTblRef.getAliases();
+      for (String old_alias: old_aliases) {
+        analyzer.removeAlias(old_alias);
+      }
+      TableRef newCollTblRef = TableRef.newTableRef(
+          analyzer, newCollPath, old_aliases[old_aliases.length - 1]);
       // Set JOIN attributes. Please note that we cannot use TableRef.setJoinAttrs()
       // because we only want to copy the ON clause and the plan hints. The col names
       // in USING have been already converted to an ON clause. We let the analyzer/
diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java b/fe/src/main/java/org/apache/impala/catalog/Type.java
index f69e45560..249eb9dbd 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Type.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Type.java
@@ -20,6 +20,7 @@ package org.apache.impala.catalog;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.impala.analysis.CollectionStructType;
 import org.apache.impala.analysis.CreateTableStmt;
 import org.apache.impala.analysis.Parser;
 import org.apache.impala.analysis.StatementBase;
@@ -234,6 +235,9 @@ public abstract class Type {
   public boolean isMapType() { return this instanceof MapType; }
   public boolean isArrayType() { return this instanceof ArrayType; }
   public boolean isStructType() { return this instanceof StructType; }
+  public boolean isCollectionStructType() {
+    return this instanceof CollectionStructType;
+  }
 
   /**
    * Returns true if Impala supports this type in the metdata. It does not mean we
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 2a585c371..be7cf7776 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -78,6 +78,9 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         collectionField, collectionTable), tbl);
     TblsAnalyzeOk(String.format("select c.%s from $TBL a, a.int_array_col b, a.%s c",
         collectionField, collectionTable), tbl);
+    TblsAnalyzeOk(String.format(
+        "select 1 from $TBL, allcomplextypes.%s, functional.allcomplextypes.%s",
+        collectionTable, collectionTable), tbl);
 
     // Test join types. Parent/collection joins do not require an ON or USING clause.
     for (JoinOperator joinOp: JoinOperator.values()) {
@@ -118,15 +121,9 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         collectionField, collectionTable, collectionTable), tbl,
         "Duplicate table alias: 'b'");
     // Duplicate implicit alias.
-    String[] childTblPath = collectionTable.split("\\.");
-    String childTblAlias = childTblPath[childTblPath.length - 1];
     TblsAnalysisError(String.format("select %s from $TBL a, a.%s, a.%s",
         collectionField, collectionTable, collectionTable), tbl,
         String.format("Duplicate table alias: '%s'", "a." + collectionTable));
-    TblsAnalysisError(String.format(
-        "select 1 from $TBL, allcomplextypes.%s, functional.allcomplextypes.%s",
-        collectionTable, collectionTable), tbl,
-        String.format("Duplicate table alias: '%s'", childTblAlias));
     // Duplicate implicit/explicit alias.
     TblsAnalysisError(String.format(
         "select %s from $TBL, functional.allcomplextypes.%s allcomplextypes",
@@ -413,7 +410,8 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         "TABLESAMPLE is only supported on HDFS tables: functional.alltypes_view");
     AnalysisError("select * from functional.allcomplextypes.int_array_col " +
         "tablesample system (10)",
-        "TABLESAMPLE is only supported on HDFS tables: int_array_col");
+        "TABLESAMPLE is only supported on HDFS tables: " +
+        "functional.allcomplextypes.int_array_col");
     AnalysisError("select * from functional.allcomplextypes a, a.int_array_col " +
         "tablesample system (10)",
         "TABLESAMPLE is only supported on HDFS tables: a.int_array_col");
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 996591da2..f1bef2a07 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3471,3 +3471,28 @@ ALTER MATERIALIZED VIEW {db_name}{db_suffix}.{table_name} REBUILD;
 -- do a count to confirm if the rebuild populated rows in the MV
 select count(*) as mv_count from {db_name}{db_suffix}.{table_name};
 ====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+array_tbl
+---- COLUMNS
+id INT
+int_1d ARRAY<INT>
+int_2d ARRAY<ARRAY<INT>>
+int_3d ARRAY<ARRAY<ARRAY<INT>>>
+string_1d ARRAY<STRING>
+string_2d ARRAY<ARRAY<STRING>>
+string_3d ARRAY<ARRAY<ARRAY<STRING>>>
+---- DEPENDENT_LOAD_HIVE
+-- It would be nice to insert NULLs, but I couldn't find a way in Hive.
+INSERT INTO {db_name}{db_suffix}.{table_name} VALUES
+ (1,
+  array(1, 2, NULL),
+  array(array(1, 2, NULL), array(3)),
+  array(array(array(1, 2, NULL), array(3)), array(array(4))),
+  array("1", "2", NULL),
+  array(array("1", "2", NULL), array("3")),
+  array(array(array("1", "2", NULL), array("3")), array(array("4")))
+ )
+---- LOAD
+====
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index 6b26d3ccf..54caaeb5e 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -329,6 +329,8 @@ table_name:alltypessmall_bool_sorted, constraint:restrict_to, table_format:orc/d
 
 table_name:complextypes_arrays_only_view, constraint:restrict_to, table_format:parquet/none/none
 table_name:complextypes_arrays_only_view, constraint:restrict_to, table_format:orc/def/block
+table_name:array_tbl, constraint:restrict_to, table_format:parquet/none/none
+table_name:array_tbl, constraint:restrict_to, table_format:orc/def/block
 
 # 'alltypestiny_negative' only used in ORC tests.
 table_name:alltypestiny_negative, constraint:restrict_to, table_format:orc/def/block
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
index f4a39cab6..bfd32243c 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
@@ -73,7 +73,7 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.complextypes_arrays]
    HDFS partitions=1/1 files=1 size=1.06KB
    predicates: !empty(arr1)
-   predicates on arr1: UNNEST(arr1) < 5, arr1.item < 5
+    predicates on functional_parquet.complextypes_arrays.arr1: UNNEST(functional_parquet.complextypes_arrays.arr1) < 5, functional_parquet.complextypes_arrays.arr1.item < 5
    row-size=16B cardinality=1.35K
 ====
 # Similar as above but using the FROM clause syntax for zipping unnest.
@@ -99,7 +99,7 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.complextypes_arrays]
    HDFS partitions=1/1 files=1 size=1.06KB
    predicates: !empty(arr1)
-   predicates on arr1: arr1.item < 5
+   predicates on functional_parquet.complextypes_arrays.arr1: functional_parquet.complextypes_arrays.arr1.item < 5
    row-size=16B cardinality=1.35K
 ====
 # Check that for multiple unnests the predicate on the unnested items is pushed to the
@@ -172,7 +172,7 @@ PLAN-ROOT SINK
 |  |     row-size=16B cardinality=1
 |  |
 |  03:UNNEST [test_zipping_unnest_db.view_arrays.arr1, test_zipping_unnest_db.view_arrays.arr2]
-|     predicates: UNNEST(arr1) < 5
+|     predicates: UNNEST(functional_parquet.complextypes_arrays.arr1) < 5
 |     row-size=4B cardinality=10
 |
 00:SCAN HDFS [functional_parquet.complextypes_arrays]
@@ -197,7 +197,7 @@ PLAN-ROOT SINK
 |  |     row-size=16B cardinality=1
 |  |
 |  03:UNNEST [a.arr1, a.arr2]
-|     predicates: arr1.item < 5
+|     predicates: functional_parquet.complextypes_arrays.arr1.item < 5
 |     row-size=4B cardinality=10
 |
 00:SCAN HDFS [functional_parquet.complextypes_arrays]
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
index 1b2ef1e63..8904119d6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test
@@ -382,4 +382,13 @@ where a.unnested_arr1 < 5;
 7,2,'NULL'
 ---- TYPES
 INT,INT,STRING
-====
\ No newline at end of file
+====
+---- QUERY
+# Regression test for:
+#   IMPALA-11434: "More than 1 2d arrays in select list causes analysis error"
+select id, int_1d, int_2d, int_3d, string_1d, string_2d, string_3d from array_tbl;
+---- RESULTS
+1,'[1,2,NULL]','[[1,2,NULL],[3]]','[[[1,2,NULL],[3]],[[4]]]','["1","2",NULL]','[["1","2",NULL],["3"]]','[[["1","2",NULL],["3"]],[["4"]]]'
+---- TYPES
+INT,STRING,STRING,STRING,STRING,STRING,STRING
+=====