You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/01/11 00:02:22 UTC

[impala] 02/02: IMPALA-11030: Fix incorrect creation of common partition exprs

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6894085e4e280d728957e8b9fe175be23bfc6ec0
Author: Aman Sinha <am...@cloudera.com>
AuthorDate: Sun Dec 5 23:51:55 2021 -0800

    IMPALA-11030: Fix incorrect creation of common partition exprs
    
    When there are 2 or more analytic functions in an inline view
    and at least one of them does not have a partition-by expr,
    we were previously still populating the commonPartitionExprs
    list in AnalyticInfo. This common partition expr was then
    used during the auxiliary predicate creation when the outer
    query has a predicate on partition-by column. This leads to
    wrong result because the auxiliary predicate is pushed down
    to the table scan. While pushing down predicate on a
    partitioning column is okay if all the analytic functions
    contain that partitioning column, it is not correct to do
    this when at least one analytic function does not have that
    partitioning column.
    
    This patch fixes the wrong result by ensuring that the
    AnalyticInfo's commonPartitionExprs is empty if at least
    one analytic function does not have partitioning exprs.
    
    Testing:
     - Added new planner test and e2e test for row_num
       analytic function
    
    Change-Id: Iebb51f691e8e5459ffbaf5a49907140f2de212cc
    Reviewed-on: http://gerrit.cloudera.org:8080/18072
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Aman Sinha <am...@cloudera.com>
---
 .../org/apache/impala/analysis/AnalyticInfo.java   |  6 +++-
 .../queries/PlannerTest/analytic-fns.test          | 39 ++++++++++++++++++++++
 .../queries/QueryTest/analytic-fns.test            | 24 +++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalyticInfo.java b/fe/src/main/java/org/apache/impala/analysis/AnalyticInfo.java
index 0d8e3bc..585cddd 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalyticInfo.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalyticInfo.java
@@ -113,7 +113,11 @@ public class AnalyticInfo extends AggregateInfoBase {
     for (Expr analyticExpr: analyticExprs_) {
       Preconditions.checkState(analyticExpr.isAnalyzed());
       List<Expr> partitionExprs = ((AnalyticExpr) analyticExpr).getPartitionExprs();
-      if (partitionExprs == null) continue;
+      if (partitionExprs == null || partitionExprs.size() == 0) {
+        // if any of the partition by list is empty, the intersection set is empty
+        result.clear();
+        break;
+      }
       if (result.isEmpty()) {
         result.addAll(partitionExprs);
       } else {
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index a01b401..f15e32e 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -3496,3 +3496,42 @@ PLAN-ROOT SINK
    runtime filters: RF000 -> l_partkey
    row-size=20B cardinality=235.34K
 ====
+# IMPALA-11030: 2 Analytic functions within inline view,
+# only one of which has partitioning expr. Predicate in
+# outer query on partitioning expr
+select * from
+ (select id, string_col, row_number() over (order by id) num_rank,
+  row_number() over (partition by string_col order by id) prime_rank
+ from functional.alltypessmall) v where string_col = '0';
+---- PLAN
+PLAN-ROOT SINK
+|
+05:SELECT
+|  predicates: string_col = '0'
+|  row-size=33B cardinality=10
+|
+04:ANALYTIC
+|  functions: row_number()
+|  order by: id ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=33B cardinality=100
+|
+03:SORT
+|  order by: id ASC
+|  row-size=25B cardinality=100
+|
+02:ANALYTIC
+|  functions: row_number()
+|  partition by: string_col
+|  order by: id ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=25B cardinality=100
+|
+01:SORT
+|  order by: string_col ASC NULLS LAST, id ASC
+|  row-size=17B cardinality=100
+|
+00:SCAN HDFS [functional.alltypessmall]
+   HDFS partitions=4/4 files=4 size=6.32KB
+   row-size=17B cardinality=100
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
index fc1dce4..1d2fa53 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
@@ -2206,3 +2206,27 @@ BIGINT,BIGINT
 7,8
 8,8
 ====
+---- QUERY
+# IMPALA-11030: 2 Analytic functions within inline view,
+# only one of which has partitioning expr. Predicate in
+# outer query on partitioning expr
+select * from
+ (select id, string_col, row_number() over (order by id) num_rank,
+  row_number() over (partition by string_col order by id) prime_rank
+ from alltypessmall) v where string_col = '0'
+---- TYPES
+INT, STRING, BIGINT, BIGINT
+---- RESULTS
+0,'0',1,1
+10,'0',11,2
+20,'0',21,3
+25,'0',26,4
+35,'0',36,5
+45,'0',46,6
+50,'0',51,7
+60,'0',61,8
+70,'0',71,9
+75,'0',76,10
+85,'0',86,11
+95,'0',96,12
+====