You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/08 23:37:22 UTC

[impala] 25/27: IMPALA-11443: Fix partitoned top-n with -1 input cardinality

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 57ac65871b5b31d18d6a055be54584722c805943
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Mon Jul 18 20:19:53 2022 +0200

    IMPALA-11443: Fix partitoned top-n with -1 input cardinality
    
    Creating a plan with partitioned top-n node with -1 input
    cardinality led to having negative resource estimate and
    hitting a precondtition, returning error:
    "IllegalStateException: Mem estimate must be set"
    
    This change adds handling for unknown cardinality.
    
    Testing:
    - added a planner test
    
    Change-Id: I7e2dc7df397dc4684e6510a200f381a74c8dd984
    Reviewed-on: http://gerrit.cloudera.org:8080/18744
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/planner/SortNode.java   |  2 +-
 .../org/apache/impala/planner/PlannerTest.java     |  4 ++-
 .../limit-pushdown-partitioned-top-n.test          | 37 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/SortNode.java b/fe/src/main/java/org/apache/impala/planner/SortNode.java
index aa0b456d1..5f12dee45 100644
--- a/fe/src/main/java/org/apache/impala/planner/SortNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/SortNode.java
@@ -501,7 +501,7 @@ public class SortNode extends PlanNode {
           PARTIAL_SORT_MEM_LIMIT : (long) Math.ceil(fullInputSize / numInstances);
       perInstanceMinMemReservation = 3 * bufferSize * pageMultiplier;
 
-      if (type_ == TSortType.PARTITIONED_TOPN) {
+      if (type_ == TSortType.PARTITIONED_TOPN && cardinality_ >= 0) {
         // We may be able to estimate a lower memory requirement based on the size
         // of in-memory heaps.
         long totalHeapBytes = getSortInfo().estimateMaterializedSize(cardinality_);
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index fd5af1e82..7a7e60ebe 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1248,7 +1248,9 @@ public class PlannerTest extends PlannerTestBase {
    */
   @Test
   public void testLimitPushdownPartitionedTopN() {
-    runPlannerTestFile("limit-pushdown-partitioned-top-n");
+    TQueryOptions options = defaultQueryOptions();
+    options.setDisable_hdfs_num_rows_estimate(true); // Needed to test IMPALA-11443.
+    runPlannerTestFile("limit-pushdown-partitioned-top-n", options);
   }
 
   /**
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
index 167932190..24c59d02f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
@@ -854,3 +854,40 @@ PLAN-ROOT SINK
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=14B cardinality=11.00K
 ====
+# Regression test for IMPALA-11443. Tests partitioned top-n with -1 cardinality.
+# Relies on functional.tinytable not having stats and running this test with
+# DISABLE_HDFS_NUM_ROWS_ESTIMATE=1
+select a, b, rn from (
+  select *, row_number() over (partition by a order by b) as rn
+  from functional.tinytable) v
+where rn <= 5
+order by a, rn;
+---- PLAN
+PLAN-ROOT SINK
+|
+04:SORT
+|  order by: a ASC, rn ASC
+|  row-size=32B cardinality=unavailable
+|
+03:SELECT
+|  predicates: row_number() <= 5
+|  row-size=32B cardinality=unavailable
+|
+02:ANALYTIC
+|  functions: row_number()
+|  partition by: a
+|  order by: b ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=32B cardinality=unavailable
+|
+01:TOP-N
+|  partition by: a
+|  order by: b ASC
+|  partition limit: 5
+|  source expr: row_number() <= CAST(5 AS BIGINT)
+|  row-size=24B cardinality=unavailable
+|
+00:SCAN HDFS [functional.tinytable]
+   HDFS partitions=1/1 files=1 size=38B
+   row-size=24B cardinality=unavailable
+====