You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/03/27 23:02:01 UTC

[impala] 07/17: IMPALA-11843: Fix IndexOutOfBoundsException in analytic limit pushdown

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.2
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4037f43f2e6a33706e2c5a18e7fdba95b69f8601
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Mon Jan 16 19:34:13 2023 +0800

    IMPALA-11843: Fix IndexOutOfBoundsException in analytic limit pushdown
    
    When finding analytic conjuncts for analytic limit pushdown, the
    following conditions are checked:
     - The conjunct should be a binary predicate
     - Left hand side is a SlotRef referencing the analytic expression, e.g.
       "rn" of "row_number() as rn"
     - The underlying analytic function is rank(), dense_rank() or row_number()
     - The window frame is UNBOUNDED PRECEDING to CURRENT ROW
     - Right hand side is a valid numeric limit
     - The op is =, <, or <=
    See more details in AnalyticPlanner.inferPartitionLimits().
    
    While checking the 2nd and 3rd condition, we get the source exprs of the
    SlotRef. The source exprs could be empty if the SlotRef is actually
    referencing a column of the table, i.e. a column materialized by the
    scan node. Currently, we check the first source expr directly regardless
    whether the list is empty, which causes the IndexOutOfBoundsException.
    
    This patch fixes it by augmenting the check to consider an empty list.
    Also fixes a similar code in AnalyticEvalNode.
    
    Tests:
     - Add FE and e2e regression tests
    
    Change-Id: I26d6bd58be58d09a29b8b81972e76665f41cf103
    Reviewed-on: http://gerrit.cloudera.org:8080/19422
    Reviewed-by: Aman Sinha <am...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/planner/AnalyticEvalNode.java    |  2 +-
 .../org/apache/impala/planner/AnalyticPlanner.java |  2 +-
 .../limit-pushdown-partitioned-top-n.test          | 32 ++++++++++++++++++++++
 .../queries/QueryTest/partitioned-top-n.test       | 12 ++++++++
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java b/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java
index b0dd6987d..d4fb6abf4 100644
--- a/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java
@@ -587,7 +587,7 @@ public class AnalyticEvalNode extends PlanNode {
       return falseStatus;
     }
     List<Expr> lhsSourceExprs = ((SlotRef) lhs).getDesc().getSourceExprs();
-    if (lhsSourceExprs.size() > 1 ||
+    if (lhsSourceExprs.size() != 1 ||
           !(lhsSourceExprs.get(0) instanceof AnalyticExpr)) {
       return falseStatus;
     }
diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
index c070ea69f..0fc8b333f 100644
--- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
@@ -910,7 +910,7 @@ public class AnalyticPlanner {
       if (!(lhs instanceof SlotRef)) continue;
 
       List<Expr> lhsSourceExprs = ((SlotRef) lhs).getDesc().getSourceExprs();
-      if (lhsSourceExprs.size() > 1 ||
+      if (lhsSourceExprs.size() != 1 ||
             !(lhsSourceExprs.get(0) instanceof AnalyticExpr)) {
         continue;
       }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
index 24c59d02f..3b9c65263 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
@@ -891,3 +891,35 @@ PLAN-ROOT SINK
    HDFS partitions=1/1 files=1 size=38B
    row-size=24B cardinality=unavailable
 ====
+# Regression test for IMPALA-11843. Several conjuncts to consider pushing down.
+select id, rn from (
+  select id,
+    row_number() over (order by id desc) rn,
+    max(id) over () max_id
+  from functional.alltypesagg) t
+where id = max_id and rn < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+04:SELECT
+|  predicates: id = max(id), id = max(id), row_number() < 10
+|  row-size=16B cardinality=1.10K
+|
+03:ANALYTIC
+|  functions: row_number()
+|  order by: id DESC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=16B cardinality=11.00K
+|
+02:ANALYTIC
+|  functions: max(id)
+|  row-size=8B cardinality=11.00K
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=11.00K
+|
+00:SCAN HDFS [functional.alltypesagg]
+   HDFS partitions=11/11 files=11 size=814.73KB
+   row-size=4B cardinality=11.00K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
index 7cfff846f..538d289f8 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
@@ -120,3 +120,15 @@ where rnk < 10
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+select id, rn from (
+  select id,
+    row_number() over (order by id desc) rn,
+    max(id) over () max_id
+  from functional.alltypesagg) t
+where id = max_id and rn < 10
+---- RESULTS
+9999,1
+---- TYPES
+INT,BIGINT
+====