You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/07/31 06:26:58 UTC

[impala] branch master updated: IMPALA-12319: Fix NullPointerException in UnnestExpr.isBoundByTupleIds()

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new e38bc96b4 IMPALA-12319: Fix NullPointerException in UnnestExpr.isBoundByTupleIds()
e38bc96b4 is described below

commit e38bc96b49d05a15464d174336ea6b21ffb04f5e
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Thu Jul 27 17:56:34 2023 +0800

    IMPALA-12319: Fix NullPointerException in UnnestExpr.isBoundByTupleIds()
    
    UnnestExpr can be used in a sort tuple which won't have a resolved path.
    In such case, there is no parent tuple id so we should not check it in
    UnnestExpr.isBoundByTupleIds(). This fixes the NullPointerException.
    
    Tests
     - Added fe and e2e tests for sorting columns come from unnest().
     - Move test_zipping_unnest_from_view to a dedicated class that only has
       the parquet file_format dimension to make sure it also runs in "core"
       exploration strategy. Before this it only runs in "exhaustive"
       strategy.
    
    Change-Id: I43e1ef0467edfb8a4a23047a439426053c93ad72
    Reviewed-on: http://gerrit.cloudera.org:8080/20274
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/UnnestExpr.java     |  11 +-
 .../queries/PlannerTest/zipping-unnest.test        | 136 ++++++++++++++++++++-
 .../QueryTest/zipping-unnest-from-view.test        | 102 ++++++++++++++++
 tests/query_test/test_nested_types.py              |  15 ++-
 4 files changed, 257 insertions(+), 7 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java b/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
index 932a4fce6..7e3fb5351 100644
--- a/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
@@ -242,10 +242,13 @@ public class UnnestExpr extends SlotRef {
 
   @Override
   public boolean isBoundByTupleIds(List<TupleId> tids) {
-    Preconditions.checkState(desc_ != null);
-    TupleId parentId = desc_.getParent().getRootDesc().getId();
-    for (TupleId tid: tids) {
-      if (tid.equals(parentId)) return true;
+    Preconditions.checkState(desc_ != null, "Null desc_ in UnnestExpr");
+    Preconditions.checkState(desc_.getParent() != null, "Null parent in UnnestExpr");
+    if (desc_.getParent().getRootDesc() != null) {
+      TupleId parentId = desc_.getParent().getRootDesc().getId();
+      for (TupleId tid : tids) {
+        if (tid.equals(parentId)) return true;
+      }
     }
     return super.isBoundByTupleIds(tids);
   }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
index bfd32243c..f7af6d409 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
@@ -283,4 +283,138 @@ PLAN-ROOT SINK
    HDFS partitions=1/1 files=1 size=1.06KB
    predicates: cta.id % 2 = 1
    row-size=28B cardinality=135
-====
\ No newline at end of file
+====
+select id, item, row_number() over (order by id, item) rn
+from (
+  select id, unnest(arr1) item from functional_parquet.complextypes_arrays
+) v
+---- PLAN
+PLAN-ROOT SINK
+|
+06:ANALYTIC
+|  functions: row_number()
+|  order by: id ASC, UNNEST() ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=32B cardinality=13.51K
+|
+05:SORT
+|  order by: id ASC, UNNEST() ASC
+|  row-size=24B cardinality=13.51K
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1]
+|     row-size=0B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   predicates: !empty(functional_parquet.complextypes_arrays.arr1)
+   row-size=16B cardinality=1.35K
+====
+select id, unnest(arr1), row_number() over (order by id, unnest(arr1))
+from functional_parquet.complextypes_arrays
+---- PLAN
+PLAN-ROOT SINK
+|
+06:ANALYTIC
+|  functions: row_number()
+|  order by: id ASC, UNNEST() ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=32B cardinality=13.51K
+|
+05:SORT
+|  order by: id ASC, UNNEST() ASC
+|  row-size=24B cardinality=13.51K
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1]
+|     row-size=0B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   predicates: !empty(functional_parquet.complextypes_arrays.arr1)
+   row-size=16B cardinality=1.35K
+====
+select id, item1, item2, row_number() over (order by id, item1, item2)
+from (
+  select id, unnest(arr1) as item1, unnest(arr2) as item2
+  from functional_parquet.complextypes_arrays
+) v
+---- PLAN
+PLAN-ROOT SINK
+|
+06:ANALYTIC
+|  functions: row_number()
+|  order by: id ASC, UNNEST() ASC, UNNEST() ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=56B cardinality=13.51K
+|
+05:SORT
+|  order by: id ASC, UNNEST() ASC, UNNEST() ASC
+|  row-size=48B cardinality=13.51K
+|
+01:SUBPLAN
+|  row-size=44B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=44B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=28B cardinality=1
+|  |
+|  03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1, functional_parquet.complextypes_arrays.arr2 arr2]
+|     row-size=0B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   row-size=28B cardinality=1.35K
+====
+select id, item, row_number() over (order by id, item)
+from (
+  select id, unnest(arr1) + length(unnest(arr2)) as item
+  from functional_parquet.complextypes_arrays
+) v
+---- PLAN
+PLAN-ROOT SINK
+|
+06:ANALYTIC
+|  functions: row_number()
+|  order by: id ASC, UNNEST(arr1) + length(UNNEST(arr2)) ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=48B cardinality=13.51K
+|
+05:SORT
+|  order by: id ASC, UNNEST(arr1) + length(UNNEST(arr2)) ASC
+|  row-size=40B cardinality=13.51K
+|
+01:SUBPLAN
+|  row-size=44B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=44B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=28B cardinality=1
+|  |
+|  03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1, functional_parquet.complextypes_arrays.arr2 arr2]
+|     row-size=0B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   row-size=28B cardinality=1.35K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
index 0456db078..38c9a2cc5 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
@@ -481,3 +481,105 @@ where arr1_unnested > 7;
 5,12,'twelve'
 ---- TYPES
 INT,INT,STRING
+====
+---- QUERY
+# Regression test for IMPALA-12319. Test sorting on unnest results.
+select id, item, row_number() over (order by id, item)
+from (
+  select id, unnest(arr1) item from functional_parquet.complextypes_arrays
+) v;
+---- RESULTS
+1,1,1
+1,2,2
+1,3,3
+1,4,4
+1,5,5
+2,1,6
+2,3,7
+2,4,8
+2,5,9
+2,NULL,10
+3,8,11
+3,9,12
+3,10,13
+4,10,14
+5,10,15
+5,12,16
+5,NULL,17
+7,1,18
+7,2,19
+10,1,20
+10,2,21
+10,3,22
+---- TYPES
+INT,INT,BIGINT
+====
+---- QUERY
+# Same as the above test without using the inline view.
+select id, unnest(arr1), row_number() over (order by id, unnest(arr1))
+from functional_parquet.complextypes_arrays
+---- RESULTS
+1,1,1
+1,2,2
+1,3,3
+1,4,4
+1,5,5
+2,1,6
+2,3,7
+2,4,8
+2,5,9
+2,NULL,10
+3,8,11
+3,9,12
+3,10,13
+4,10,14
+5,10,15
+5,12,16
+5,NULL,17
+7,1,18
+7,2,19
+10,1,20
+10,2,21
+10,3,22
+---- TYPES
+INT,INT,BIGINT
+====
+---- QUERY
+select id, item1, item2, row_number() over (order by id, item1, item2)
+from (
+  select id, unnest(arr1) as item1, unnest(arr2) as item2
+  from functional_parquet.complextypes_arrays
+) v;
+---- RESULTS
+1,1,'one',1
+1,2,'two',2
+1,3,'three',3
+1,4,'four',4
+1,5,'five',5
+2,1,'one',6
+2,3,'three',7
+2,4,'NULL',8
+2,5,'five',9
+2,NULL,'two',10
+3,8,'NULL',11
+3,9,'NULL',12
+3,10,'ten',13
+4,10,'ten',14
+4,NULL,'eight',15
+4,NULL,'nine',16
+5,10,'ten',17
+5,12,'twelve',18
+5,NULL,'eleven',19
+5,NULL,'thirteen',20
+6,NULL,'str1',21
+6,NULL,'str2',22
+7,1,'NULL',23
+7,2,'NULL',24
+9,NULL,'str1',25
+9,NULL,'str2',26
+10,1,'NULL',27
+10,2,'NULL',28
+10,3,'NULL',29
+---- TYPES
+INT,INT,STRING,BIGINT
+====
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index ce3d060ff..b9cc7bd4f 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -270,13 +270,24 @@ class TestZippingUnnest(ImpalaTestSuite):
     """
     self.run_test_case('QueryTest/zipping-unnest-in-select-list', vector)
 
+
+class TestZippingUnnestFromView(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestZippingUnnestFromView, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format == 'parquet')
+
   def test_zipping_unnest_from_view(self, vector, unique_database):
     """Zipping unnest queries where views are involved."""
-    if vector.get_value('table_format').file_format == 'orc':
-      pytest.skip('No need to run this test for multiple file formats.')
     self.run_test_case('QueryTest/zipping-unnest-from-view', vector,
         use_db=unique_database)
 
+
 class TestNestedTypesNoMtDop(ImpalaTestSuite):
   """Functional tests for nested types that do not need to be run with mt_dop > 0."""
   @classmethod