You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/02/01 09:32:25 UTC

[impala] 01/02: IMPALA-11845: Fix incorrect check of struct STAR path in resolvePathWithMasking

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0c1bd9eff37b9a2df2849e6bc8edc8035bb8aefe
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Tue Jan 17 16:27:44 2023 +0800

    IMPALA-11845: Fix incorrect check of struct STAR path in resolvePathWithMasking
    
    resolvePathWithMasking() is a wrapper on resolvePath() to further
    resolve nested columns inside the table masking view. When it was
    added, complex types in the select list hadn't been supported yet. So
    the table masking view can't expose complex type columns directly in the
    select list. Any paths in nested types will be further resolved inside
    the table masking view in resolvePathWithMasking().
    
    Take the following query as an example:
      select id, nested_struct.* from complextypestbl;
    If Ranger column-masking/row-filter policies applied on the table, the
    query is rewritten as
      select id, nested_struct.* from (
        select mask(id) from complextypestbl
        where row-filtering-condition
      ) t;
    Table masking view "t" can't expose the nested column "nested_struct".
    So we further resolve "nested_struct" inside the inlineView to use the
    masked table "complextypestbl". The underlying TableRef is expected to
    be a BaseTableRef.
    
    Paths that don't reference nested columns should be resolved and
    returned directly (just like the original resolvePath() does). E.g.
      select v.* from masked_view v
    is rewritten to
      select v.* from (
        select mask(c1), mask(c2), ..., mask(cn)
        from masked_view
        where row-filtering-condition
      ) v;
    
    The STAR path "v.*" should be resolved directly. However, it's treated
    as a nested column unexpectedly. The code then tries to resolve it
    inside the table "masked_view" and found "masked_view" is not a table so
    throws the IllegalStateException.
    
    These are the current conditions for identifying nested STAR paths:
     - The destType is STRUCT
     - And the resolved path is rooted at a valid tuple descriptor
    
    They don't really recognize the nested struct columns because STAR paths
    on table/view also match these conditions. When the STAR path is an
    expansion on a catalog table/view, the root tuple descriptor is
    exactly the output tuple of the table/view. The destType is the type of
    the tuple descriptor which is always a StructType.
    
    Note that STAR paths on other nested types, i.e. array/map, are invalid.
    So the first condition matches for all valid cases. The second condition
    also matches all valid cases since both the table/view and struct STAR
    expansion have the path rooted at a valid tuple descriptor.
    
    This patch fixes the check for nested struct STAR path by checking
    the matched types instead. Note that if "v.*" is a table/view expansion,
    the matched type list is empty. If "v.*" is a struct column expansion,
    the matched type list contains the STRUCT column type.
    
    Tests:
     - Add missing coverage on STAR paths (v.*) on masked views.
    
    Change-Id: I8f1e78e325baafbe23101909d47e82bf140a2d77
    Reviewed-on: http://gerrit.cloudera.org:8080/19429
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |   8 +-
 .../main/java/org/apache/impala/analysis/Path.java |   5 +-
 .../queries/QueryTest/ranger_column_masking.test   |  46 +++++++++
 .../ranger_column_masking_complex_types.test       | 112 ++++++++++++++++++++-
 4 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 44d4dcb4c..c2e9c174a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1190,9 +1190,11 @@ public class Analyzer {
         return resolvedPath;
       }
     } else if (pathType == PathType.STAR) {
-      if (!resolvedPath.destType().isStructType() || !resolvedPath.isRootedAtTuple()) {
-        return resolvedPath;
-      }
+      // For "v.*", return directly if "v" is a table/view.
+      // If it's a struct column, we need to further resolve it if it's now resolved on a
+      // table masking view.
+      if (resolvedPath.getMatchedTypes().isEmpty()) return resolvedPath;
+      Preconditions.checkState(resolvedPath.destType().isStructType());
     }
     // In this case, resolvedPath is resolved on a nested column. Check if it's resolved
     // on a table masking view. The root TableRef(table/view) could be at a parent query
diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java b/fe/src/main/java/org/apache/impala/analysis/Path.java
index 2dfe01032..044a42be4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Path.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Path.java
@@ -133,6 +133,7 @@ public class Path {
 
   // Registered table alias that this path is rooted at, if any.
   // Null if the path is rooted at a catalog table/view.
+  // Note that this is also set for STAR path of "v.*" when "v" is a catalog table/view.
   private final TupleDescriptor rootDesc_;
 
   // Catalog table that this resolved path is rooted at, if any.
@@ -144,7 +145,9 @@ public class Path {
   private final Path rootPath_;
 
   // List of matched types and field positions set during resolution. The matched
-  // types/positions describe the physical path through the schema tree.
+  // types/positions describe the physical path through the schema tree of a catalog
+  // table/view. Empty if the path corresponds to a catalog table/view, e.g. when it's a
+  // TABLE_REF or when it's a STAR on a table/view.
   private final List<Type> matchedTypes_ = new ArrayList<>();
   private final List<Integer> matchedPositions_ = new ArrayList<>();
 
diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test
index d23961010..9f76fc778 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking.test
@@ -446,6 +446,52 @@ order by id limit 10
 STRING,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT
 ====
 ---- QUERY
+# Test on star select with table ref
+select v.* from functional.alltypes_view v
+order by id limit 10
+---- RESULTS
+0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1
+100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1
+200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1
+300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1
+400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1
+500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1
+600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1
+700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1
+800,true,8,8,8,80,8.80000019073,80.8,'01/01/09','vvv8ttt',2009-01-01 00:08:00.280000000,2009,1
+900,false,9,9,9,90,9.89999961853,90.9,'01/01/09','vvv9ttt',2009-01-01 00:09:00.360000000,2009,1
+---- TYPES
+INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT
+====
+---- QUERY
+select v.* from functional.alltypes_view v, functional.alltypestiny t where v.id = t.id
+---- RESULTS
+0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1
+100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1
+200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1
+300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1
+400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1
+500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1
+600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1
+700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1
+---- TYPES
+INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT
+====
+---- QUERY
+select v.id, v.*, t.id from functional.alltypes_view v, functional.alltypestiny t where v.id = t.id
+---- RESULTS
+0,0,true,0,0,0,0,0.0,0.0,'01/01/09','vvv0ttt',2009-01-01 00:00:00,2009,1,0
+100,100,false,1,1,1,10,1.10000002384,10.1,'01/01/09','vvv1ttt',2009-01-01 00:01:00,2009,1,100
+200,200,true,2,2,2,20,2.20000004768,20.2,'01/01/09','vvv2ttt',2009-01-01 00:02:00.100000000,2009,1,200
+300,300,false,3,3,3,30,3.29999995232,30.3,'01/01/09','vvv3ttt',2009-01-01 00:03:00.300000000,2009,1,300
+400,400,true,4,4,4,40,4.40000009537,40.4,'01/01/09','vvv4ttt',2009-01-01 00:04:00.600000000,2009,1,400
+500,500,false,5,5,5,50,5.5,50.5,'01/01/09','vvv5ttt',2009-01-01 00:05:00.100000000,2009,1,500
+600,600,true,6,6,6,60,6.59999990463,60.6,'01/01/09','vvv6ttt',2009-01-01 00:06:00.150000000,2009,1,600
+700,700,false,7,7,7,70,7.69999980927,70.7,'01/01/09','vvv7ttt',2009-01-01 00:07:00.210000000,2009,1,700
+---- TYPES
+INT,INT,BOOLEAN,TINYINT,SMALLINT,INT,BIGINT,FLOAT,DOUBLE,STRING,STRING,TIMESTAMP,INT,INT,INT
+====
+---- QUERY
 # Test on local view (CTE). Correctly ignore masking on local view names so the result
 # won't be 100 (affected by policy id => id * 100).
 use functional;
diff --git a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test
index 316f573b4..563df801b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/ranger_column_masking_complex_types.test
@@ -45,7 +45,7 @@ select id, nested_struct.a from complextypestbl
 BIGINT,INT
 ====
 ---- QUERY
-# Test resolving nested columns in expending star expression.
+# Test resolving nested columns in expanding star expression.
 select id, nested_struct.* from complextypestbl
 ---- RESULTS
 100,1
@@ -60,6 +60,116 @@ select id, nested_struct.* from complextypestbl
 BIGINT,INT
 ====
 ---- QUERY
+# Test resolving nested columns in expanding star expression.
+select nested_struct.* from complextypestbl
+---- RESULTS
+1
+NULL
+NULL
+NULL
+NULL
+NULL
+7
+-1
+---- TYPES
+INT
+====
+---- QUERY
+# Test resolving nested columns in expanding star expression.
+set EXPAND_COMPLEX_TYPES=1;
+select nested_struct.* from complextypestbl
+---- CATCH
+AnalysisException: Struct containing a collection type is not allowed in the select list.
+====
+---- QUERY
+# Test resolving explicit STAR path on a nested struct column inside array
+select id, nested_arr.item.*
+from functional_parquet.complextypestbl t,
+  t.nested_struct.c.d arr,
+  arr.item nested_arr;
+---- RESULTS
+100,10,'aaa'
+100,-10,'bbb'
+100,11,'c'
+200,NULL,'NULL'
+200,10,'aaa'
+200,NULL,'NULL'
+200,-10,'bbb'
+200,NULL,'NULL'
+200,11,'c'
+200,NULL,'NULL'
+700,NULL,'NULL'
+800,-1,'nonnullable'
+---- TYPES
+BIGINT,INT,STRING
+====
+---- QUERY
+# Test resolving explicit STAR path on a nested struct column inside array
+select nested_arr.item.*
+from functional_parquet.complextypestbl t,
+  t.nested_struct.c.d arr,
+  arr.item nested_arr;
+---- RESULTS
+10,'aaa'
+-10,'bbb'
+11,'c'
+NULL,'NULL'
+10,'aaa'
+NULL,'NULL'
+-10,'bbb'
+NULL,'NULL'
+11,'c'
+NULL,'NULL'
+NULL,'NULL'
+-1,'nonnullable'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# Test resolving implicit STAR path on a nested struct column inside array
+select id, nested_arr.*
+from functional_parquet.complextypestbl t,
+  t.nested_struct.c.d arr,
+  arr.item nested_arr;
+---- RESULTS
+100,10,'aaa'
+100,-10,'bbb'
+100,11,'c'
+200,NULL,'NULL'
+200,10,'aaa'
+200,NULL,'NULL'
+200,-10,'bbb'
+200,NULL,'NULL'
+200,11,'c'
+200,NULL,'NULL'
+700,NULL,'NULL'
+800,-1,'nonnullable'
+---- TYPES
+BIGINT,INT,STRING
+====
+---- QUERY
+# Test resolving explicit STAR path on a nested struct column inside array
+select nested_arr.*
+from functional_parquet.complextypestbl t,
+  t.nested_struct.c.d arr,
+  arr.item nested_arr;
+---- RESULTS
+10,'aaa'
+-10,'bbb'
+11,'c'
+NULL,'NULL'
+10,'aaa'
+NULL,'NULL'
+-10,'bbb'
+NULL,'NULL'
+11,'c'
+NULL,'NULL'
+NULL,'NULL'
+-1,'nonnullable'
+---- TYPES
+INT,STRING
+====
+---- QUERY
 # Test resolving nested column in function.
 select count(id), count(nested_struct.a) from complextypestbl
 ---- RESULTS