You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/08 23:37:22 UTC
[impala] 25/27: IMPALA-11443: Fix partitoned top-n with -1 input cardinality
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 57ac65871b5b31d18d6a055be54584722c805943
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Mon Jul 18 20:19:53 2022 +0200
IMPALA-11443: Fix partitoned top-n with -1 input cardinality
Creating a plan with partitioned top-n node with -1 input
cardinality led to having negative resource estimate and
hitting a precondtition, returning error:
"IllegalStateException: Mem estimate must be set"
This change adds handling for unknown cardinality.
Testing:
- added a planner test
Change-Id: I7e2dc7df397dc4684e6510a200f381a74c8dd984
Reviewed-on: http://gerrit.cloudera.org:8080/18744
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../java/org/apache/impala/planner/SortNode.java | 2 +-
.../org/apache/impala/planner/PlannerTest.java | 4 ++-
.../limit-pushdown-partitioned-top-n.test | 37 ++++++++++++++++++++++
3 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/planner/SortNode.java b/fe/src/main/java/org/apache/impala/planner/SortNode.java
index aa0b456d1..5f12dee45 100644
--- a/fe/src/main/java/org/apache/impala/planner/SortNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/SortNode.java
@@ -501,7 +501,7 @@ public class SortNode extends PlanNode {
PARTIAL_SORT_MEM_LIMIT : (long) Math.ceil(fullInputSize / numInstances);
perInstanceMinMemReservation = 3 * bufferSize * pageMultiplier;
- if (type_ == TSortType.PARTITIONED_TOPN) {
+ if (type_ == TSortType.PARTITIONED_TOPN && cardinality_ >= 0) {
// We may be able to estimate a lower memory requirement based on the size
// of in-memory heaps.
long totalHeapBytes = getSortInfo().estimateMaterializedSize(cardinality_);
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index fd5af1e82..7a7e60ebe 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1248,7 +1248,9 @@ public class PlannerTest extends PlannerTestBase {
*/
@Test
public void testLimitPushdownPartitionedTopN() {
- runPlannerTestFile("limit-pushdown-partitioned-top-n");
+ TQueryOptions options = defaultQueryOptions();
+ options.setDisable_hdfs_num_rows_estimate(true); // Needed to test IMPALA-11443.
+ runPlannerTestFile("limit-pushdown-partitioned-top-n", options);
}
/**
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
index 167932190..24c59d02f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
@@ -854,3 +854,40 @@ PLAN-ROOT SINK
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
+# Regression test for IMPALA-11443. Tests partitioned top-n with -1 cardinality.
+# Relies on functional.tinytable not having stats and running this test with
+# DISABLE_HDFS_NUM_ROWS_ESTIMATE=1
+select a, b, rn from (
+ select *, row_number() over (partition by a order by b) as rn
+ from functional.tinytable) v
+where rn <= 5
+order by a, rn;
+---- PLAN
+PLAN-ROOT SINK
+|
+04:SORT
+| order by: a ASC, rn ASC
+| row-size=32B cardinality=unavailable
+|
+03:SELECT
+| predicates: row_number() <= 5
+| row-size=32B cardinality=unavailable
+|
+02:ANALYTIC
+| functions: row_number()
+| partition by: a
+| order by: b ASC
+| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+| row-size=32B cardinality=unavailable
+|
+01:TOP-N
+| partition by: a
+| order by: b ASC
+| partition limit: 5
+| source expr: row_number() <= CAST(5 AS BIGINT)
+| row-size=24B cardinality=unavailable
+|
+00:SCAN HDFS [functional.tinytable]
+ HDFS partitions=1/1 files=1 size=38B
+ row-size=24B cardinality=unavailable
+====