You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/09/27 09:51:46 UTC
[impala] 03/07: IMPALA-7560: Set selectivity of Not-equal
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.0.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8862719d87ac5dc214985025463f002d41b15672
Author: liuyao <54...@163.com>
AuthorDate: Mon Apr 26 14:37:43 2021 +0800
IMPALA-7560: Set selectivity of Not-equal
Calculate binary predicate selectivity if one of the children is
a slotref and the other children are all constant.
eg. something like "col != 5", but not "2 * col != 10"
selectivity = 1 - 1/ndv
Testing:
Modify the function testNeSelectivity() of the
ExprCardinalityTest.java, change -1 to the correct value.
Change-Id: Icd6f5945840ea2a8194d72aa440ddfa6915cbb3a
Reviewed-on: http://gerrit.cloudera.org:8080/17344
Reviewed-by: Qifan Chen <qc...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
---
.../apache/impala/analysis/BinaryPredicate.java | 41 +++++++++++--
.../impala/analysis/ExprCardinalityTest.java | 71 +++++++++-------------
.../org/apache/impala/planner/CardinalityTest.java | 4 +-
.../queries/PlannerTest/analytic-fns.test | 4 +-
.../queries/PlannerTest/card-scan.test | 4 +-
.../queries/PlannerTest/hbase.test | 8 +--
.../queries/PlannerTest/inline-view-limit.test | 4 +-
.../queries/PlannerTest/kudu.test | 4 +-
.../queries/PlannerTest/predicate-propagation.test | 8 +--
.../queries/PlannerTest/tpch-all.test | 50 +++++++--------
.../queries/PlannerTest/tpch-kudu.test | 2 +-
.../queries/PlannerTest/tpch-nested.test | 8 +--
.../queries/PlannerTest/tpch-views.test | 2 +-
13 files changed, 110 insertions(+), 100 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
index 9092058..83ab86c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
+++ b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
@@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.List;
import org.apache.impala.catalog.Db;
+import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.Function.CompareMode;
import org.apache.impala.catalog.ScalarFunction;
import org.apache.impala.catalog.Type;
@@ -237,14 +238,42 @@ public class BinaryPredicate extends Predicate {
// TODO: Compute selectivity for nested predicates.
// TODO: Improve estimation using histograms.
Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
- if ((op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT)
- && isSingleColumnPredicate(slotRefRef, null)) {
- long distinctValues = slotRefRef.getRef().getNumDistinctValues();
- if (distinctValues > 0) {
- selectivity_ = 1.0 / distinctValues;
- selectivity_ = Math.max(0, Math.min(1, selectivity_));
+ if (!isSingleColumnPredicate(slotRefRef, null)) {
+ return;
+ }
+ long distinctValues = slotRefRef.getRef().getNumDistinctValues();
+ if (distinctValues < 0) {
+ // Lack of statistics to estimate the selectivity.
+ return;
+ } else if (distinctValues == 0 && (op_ == Operator.EQ || op_ == Operator.NE)) {
+ // If the table is empty, then distinctValues is 0. This case is equivalent
+ // to selectivity is 0.
+ selectivity_ = 0.0;
+ return;
+ }
+
+ if (op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT) {
+ selectivity_ = 1.0 / distinctValues;
+ } else if (op_ == Operator.NE || op_ == Operator.DISTINCT_FROM) {
+ // For case <column> IS DISTINCT FROM NULL, all non-null values are true
+ if (Expr.IS_NULL_LITERAL.apply(getChild(1)) && op_ == Operator.DISTINCT_FROM) {
+ selectivity_ = 1.0;
+ } else {
+ selectivity_ = 1.0 - 1.0 / distinctValues;
}
+ SlotDescriptor slotDesc = slotRefRef.getRef().getDesc();
+ if (slotDesc.getStats().hasNullsStats()) {
+ FeTable table = slotDesc.getParent().getTable();
+ if (table != null && table.getNumRows() > 0) {
+ long numRows = table.getNumRows();
+ selectivity_ *=
+ (double) (numRows - slotDesc.getStats().getNumNulls()) / numRows;
+ }
+ }
+ } else {
+ return;
}
+ selectivity_ = Math.max(0, Math.min(1, selectivity_));
}
@Override
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
index 061e4a9..efd0378 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
@@ -314,23 +314,16 @@ public class ExprCardinalityTest {
*/
@Test
public void testNeSelectivity() throws ImpalaException {
- // Bug: No estimated selectivity for != (IMPALA-8039)
- //verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300);
- verifySelectExpr("alltypes", "id != 10", 3, -1);
- //verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2);
- verifySelectExpr("alltypes", "bool_col != true", 3, -1);
- //verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10);
- verifySelectExpr("alltypes", "int_col != 10", 3, -1);
-
- //verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26);
- verifySelectExpr("nullrows", "id != 'foo'", 3, -1);
- // Bug: All nulls, so NDV should = 1, so Sel should be 1 - 1.0/1
- //verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1);
- verifySelectExpr("nullrows", "null_str != 'foo'", 3, -1);
- //verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6);
- verifySelectExpr("nullrows", "group_str != 'foo'", 3, -1);
- //verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, 1 - 1.0/7);
- verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, -1);
+ verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300);
+ verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2);
+ verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10);
+ verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26);
+ verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1);
+ verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6);
+ verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, (1 - 1.0/6)*6/26);
+ // field has no statistics.
+ verifySelectExpr("emptytable", "field != 'foo'", 3, -1);
+ verifySelectExpr("emptytable", "f2 != 10", 3, 0.0);
// Bug: Sel should default to 1 - good old 0.1
verifySelectExpr("manynulls", "id != 10", 3, -1);
@@ -343,36 +336,28 @@ public class ExprCardinalityTest {
*/
@Test
public void testDistinctSelectivity() throws ImpalaException {
- // BUG: IS DISTINCT has no selectivity
- //verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300);
- verifySelectExpr("alltypes", "id is distinct from 10", 3, -1);
+
+ verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300);
// Bug: does not treat NULL specially
// Bug: NDV sould be 2 since IS DISTINCT won't return NULL
//verifySelectExpr("alltypes", "id is distinct from null", 2, 1);
- verifySelectExpr("alltypes", "id is distinct from null", 3, -1);
- //verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2);
- verifySelectExpr("alltypes", "bool_col is distinct from true", 3, -1);
- //verifySelectExpr("alltypes", "bool_col is distinct from null", 2, 1);
- verifySelectExpr("alltypes", "bool_col is distinct from null", 3, -1);
- //verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10);
- verifySelectExpr("alltypes", "int_col is distinct from 10", 3, -1);
- //verifySelectExpr("alltypes", "int_col is distinct from null", 2, 1);
- verifySelectExpr("alltypes", "int_col is distinct from null", 3, -1);
-
- //verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26);
- verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, -1);
- //verifySelectExpr("nullrows", "id is distinct from null", 2, 1);
- verifySelectExpr("nullrows", "id is distinct from null", 3, -1);
- // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
- //verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 2, 1 - 1.0/1);
- verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, -1);
- verifySelectExpr("nullrows", "null_str is distinct from null", 3, -1);
- //verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6);
- verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, -1);
- //verifySelectExpr("nullrows", "group_str is distinct from null", 2, 0);
- verifySelectExpr("nullrows", "group_str is distinct from null", 3, -1);
- //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1 - 1.0/7);
+ verifySelectExpr("alltypes", "id is distinct from null", 3, 1);
+ verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2);
+
+ verifySelectExpr("alltypes", "bool_col is distinct from null", 3, 1);
+ verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10);
+
+ verifySelectExpr("alltypes", "int_col is distinct from null", 3, 1);
+ verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26);
+
+ verifySelectExpr("nullrows", "id is distinct from null", 3, 1);
+ verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, 1 - 1.0/1);
+ verifySelectExpr("nullrows", "null_str is distinct from null", 3, 0);
+ verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6);
+ verifySelectExpr("nullrows", "group_str is distinct from null", 3, 1);
+ verifySelectExpr("nullrows", "group_str is distinct from null", 3, 1);
verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1.0/6);
+ verifySelectExpr("nullrows", "some_nulls is distinct from null", 3, 6.0/26.0);
// Bug: Sel should default to 1 - good old 0.1
verifySelectExpr("manynulls", "id is distinct from 10", 3, -1);
diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
index f634b28..91fc296 100644
--- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
@@ -67,10 +67,8 @@ public class CardinalityTest extends PlannerTestBase {
verifyCardinality(
"SELECT id FROM functional.alltypes WHERE int_col = 1", 7300/10);
- // Assume classic 0.1 selectivity for other operators
- // IMPALA-7560 says this should be revised.
verifyCardinality(
- "SELECT id FROM functional.alltypes WHERE int_col != 1", 730);
+ "SELECT id FROM functional.alltypes WHERE int_col != 1", 6570);
// IMPALA-7601 says the following should be revised.
verifyCardinality(
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index 593ac72..a01b401 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -2086,7 +2086,7 @@ PLAN-ROOT SINK
|
05:SELECT
| predicates: id IS NULL, tinyint_col != 5
-| row-size=9B cardinality=730
+| row-size=9B cardinality=699
|
00:UNION
| row-size=9B cardinality=7.30K
@@ -2124,7 +2124,7 @@ PLAN-ROOT SINK
|
05:SELECT
| predicates: id IS NULL, tinyint_col != 5
-| row-size=9B cardinality=730
+| row-size=9B cardinality=699
|
00:UNION
| row-size=9B cardinality=7.30K
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
index cf4c316..20aee39 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
@@ -156,8 +156,6 @@ PLAN-ROOT SINK
# Not-equal, card = 1 - 1/ndv
# Use smaller table so effect is clear
# |T|=8, NDV=8
-# Bug: IMPALA-8039
-# Bug, expected cardinality ~7
select *
from functional.alltypestiny
where id != 10
@@ -167,7 +165,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
predicates: id != 10
- row-size=89B cardinality=1
+ row-size=89B cardinality=7
====
# Inequality. No useful stats.
# Bug: IMPALA-8037, Assumes sel = 0.1
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
index 5a26b0f..f829753 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
@@ -411,10 +411,10 @@ PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypessmall]
hbase filters:
- d:string_col NOT_EQUAL '2'
d:string_col GREATER_OR_EQUAL '4'
d:date_string_col EQUAL '04/03/09'
- predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
+ d:string_col NOT_EQUAL '2'
+ predicates: string_col >= '4', date_string_col = '04/03/09', string_col != '2'
row-size=89B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
@@ -423,10 +423,10 @@ PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypessmall]
hbase filters:
- d:string_col NOT_EQUAL '2'
d:string_col GREATER_OR_EQUAL '4'
d:date_string_col EQUAL '04/03/09'
- predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
+ d:string_col NOT_EQUAL '2'
+ predicates: string_col >= '4', date_string_col = '04/03/09', string_col != '2'
row-size=89B cardinality=1
====
# mix of predicates and functional_hbase. filters
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
index 266d528..514bf0d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
@@ -708,7 +708,7 @@ PLAN-ROOT SINK
|
02:SCAN HDFS [functional.alltypessmall]
partitions=4/4 files=4 size=6.32KB
- predicates: functional.alltypessmall.id != 1, functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, id != 2
+ predicates: functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, functional.alltypessmall.id != 1, id != 2
runtime filters: RF000 -> id
row-size=4B cardinality=10
====
@@ -772,7 +772,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
- predicates: functional.alltypes.id != 2, functional.alltypes.id > 10, functional.alltypes.id > 20, id != 1
+ predicates: functional.alltypes.id > 10, functional.alltypes.id > 20, functional.alltypes.id != 2, id != 1
runtime filters: RF000 -> id
row-size=4B cardinality=730
====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 824188a..6bdcc41 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -143,7 +143,7 @@ and zip > 1 and zip < 50
PLAN-ROOT SINK
|
00:SCAN KUDU [functional_kudu.testtbl]
- kudu predicates: id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0, name = 'foo'
+ kudu predicates: name = 'foo', id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0
row-size=28B cardinality=0
---- SCANRANGELOCATIONS
NODE 0:
@@ -154,7 +154,7 @@ PLAN-ROOT SINK
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN KUDU [functional_kudu.testtbl]
- kudu predicates: id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0, name = 'foo'
+ kudu predicates: name = 'foo', id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0
row-size=28B cardinality=0
====
# Constant propagation works for Kudu
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
index b977b98..74ff2b4 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
@@ -585,7 +585,7 @@ PLAN-ROOT SINK
| 01:SCAN HDFS [functional.alltypessmall]
| partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
| HDFS partitions=2/4 files=2 size=3.16KB
-| predicates: functional.alltypessmall.int_col != 5, id > 11
+| predicates: id > 11, functional.alltypessmall.int_col != 5
| row-size=16B cardinality=5
|
00:SCAN HDFS [functional.alltypes a]
@@ -629,7 +629,7 @@ PLAN-ROOT SINK
| 01:SCAN HDFS [functional.alltypessmall]
| partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
| HDFS partitions=2/4 files=2 size=3.16KB
-| predicates: functional.alltypessmall.int_col != 5, id > 11
+| predicates: id > 11, functional.alltypessmall.int_col != 5
| row-size=16B cardinality=5
|
00:SCAN HDFS [functional.alltypes a]
@@ -672,7 +672,7 @@ PLAN-ROOT SINK
| 01:SCAN HDFS [functional.alltypessmall]
| partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
| HDFS partitions=2/4 files=2 size=3.16KB
-| predicates: functional.alltypessmall.int_col != 5, id > 11
+| predicates: id > 11, functional.alltypessmall.int_col != 5
| row-size=16B cardinality=5
|
00:SCAN HDFS [functional.alltypes a]
@@ -709,7 +709,7 @@ PLAN-ROOT SINK
| 01:SCAN HDFS [functional.alltypessmall]
| partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
| HDFS partitions=2/4 files=2 size=3.16KB
-| predicates: functional.alltypessmall.int_col != 5, id > 11
+| predicates: id > 11, functional.alltypessmall.int_col != 5
| row-size=16B cardinality=5
|
00:SCAN HDFS [functional.alltypes a]
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index 917e5c1..e54f6a3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -3453,20 +3453,20 @@ PLAN-ROOT SINK
|
07:SORT
| order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
06:AGGREGATE [FINALIZE]
| output: count(ps_suppkey)
| group by: p_brand, p_type, p_size
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
05:AGGREGATE
| group by: p_brand, p_type, p_size, ps_suppkey
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: ps_suppkey = s_suppkey
-| row-size=81B cardinality=31.92K
+| row-size=81B cardinality=31.49K
|
|--02:SCAN HDFS [tpch.supplier]
| HDFS partitions=1/1 files=1 size=1.33MB
@@ -3476,12 +3476,12 @@ PLAN-ROOT SINK
03:HASH JOIN [INNER JOIN]
| hash predicates: ps_partkey = p_partkey
| runtime filters: RF000 <- p_partkey
-| row-size=81B cardinality=31.92K
+| row-size=81B cardinality=31.49K
|
|--01:SCAN HDFS [tpch.part]
| HDFS partitions=1/1 files=1 size=22.83MB
-| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-| row-size=65B cardinality=8.00K
+| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+| row-size=65B cardinality=7.89K
|
00:SCAN HDFS [tpch.partsupp]
HDFS partitions=1/1 files=1 size=112.71MB
@@ -3497,33 +3497,33 @@ PLAN-ROOT SINK
|
07:SORT
| order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
13:AGGREGATE [FINALIZE]
| output: count:merge(ps_suppkey)
| group by: p_brand, p_type, p_size
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
12:EXCHANGE [HASH(p_brand,p_type,p_size)]
|
06:AGGREGATE [STREAMING]
| output: count(ps_suppkey)
| group by: p_brand, p_type, p_size
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
11:AGGREGATE
| group by: p_brand, p_type, p_size, ps_suppkey
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
10:EXCHANGE [HASH(p_brand,p_type,p_size,ps_suppkey)]
|
05:AGGREGATE [STREAMING]
| group by: p_brand, p_type, p_size, ps_suppkey
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
04:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST]
| hash predicates: ps_suppkey = s_suppkey
-| row-size=81B cardinality=31.92K
+| row-size=81B cardinality=31.49K
|
|--09:EXCHANGE [BROADCAST]
| |
@@ -3535,14 +3535,14 @@ PLAN-ROOT SINK
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: ps_partkey = p_partkey
| runtime filters: RF000 <- p_partkey
-| row-size=81B cardinality=31.92K
+| row-size=81B cardinality=31.49K
|
|--08:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.part]
| HDFS partitions=1/1 files=1 size=22.83MB
-| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-| row-size=65B cardinality=8.00K
+| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+| row-size=65B cardinality=7.89K
|
00:SCAN HDFS [tpch.partsupp]
HDFS partitions=1/1 files=1 size=112.71MB
@@ -3558,33 +3558,33 @@ PLAN-ROOT SINK
|
07:SORT
| order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
13:AGGREGATE [FINALIZE]
| output: count:merge(ps_suppkey)
| group by: p_brand, p_type, p_size
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
12:EXCHANGE [HASH(p_brand,p_type,p_size)]
|
06:AGGREGATE [STREAMING]
| output: count(ps_suppkey)
| group by: p_brand, p_type, p_size
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
11:AGGREGATE
| group by: p_brand, p_type, p_size, ps_suppkey
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
10:EXCHANGE [HASH(p_brand,p_type,p_size,ps_suppkey)]
|
05:AGGREGATE [STREAMING]
| group by: p_brand, p_type, p_size, ps_suppkey
-| row-size=65B cardinality=31.92K
+| row-size=65B cardinality=31.49K
|
04:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST]
| hash predicates: ps_suppkey = s_suppkey
-| row-size=81B cardinality=31.92K
+| row-size=81B cardinality=31.49K
|
|--JOIN BUILD
| | join-table-id=00 plan-id=01 cohort-id=01
@@ -3599,7 +3599,7 @@ PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: ps_partkey = p_partkey
-| row-size=81B cardinality=31.92K
+| row-size=81B cardinality=31.49K
|
|--JOIN BUILD
| | join-table-id=01 plan-id=02 cohort-id=01
@@ -3610,8 +3610,8 @@ PLAN-ROOT SINK
| |
| 01:SCAN HDFS [tpch.part]
| HDFS partitions=1/1 files=1 size=22.83MB
-| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-| row-size=65B cardinality=8.00K
+| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+| row-size=65B cardinality=7.89K
|
00:SCAN HDFS [tpch.partsupp]
HDFS partitions=1/1 files=1 size=112.71MB
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index b5f85ab..b851b4a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -1228,7 +1228,7 @@ PLAN-ROOT SINK
| row-size=89B cardinality=31.92K
|
|--01:SCAN KUDU [tpch_kudu.part]
-| predicates: p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
+| predicates: NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
| kudu predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9)
| row-size=73B cardinality=8.00K
|
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index 94759c8..c5a158d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -2307,8 +2307,8 @@ PLAN-ROOT SINK
|
|--05:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
-| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-| row-size=65B cardinality=8.00K
+| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+| row-size=65B cardinality=7.89K
|
01:SUBPLAN
| row-size=103B cardinality=10.00K
@@ -2368,8 +2368,8 @@ PLAN-ROOT SINK
| |
| 05:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
-| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-| row-size=65B cardinality=8.00K
+| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+| row-size=65B cardinality=7.89K
|
01:SUBPLAN
| row-size=103B cardinality=10.00K
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index adc56aa..ff599c0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -1222,7 +1222,7 @@ PLAN-ROOT SINK
|
|--01:SCAN HDFS [tpch.part]
| HDFS partitions=1/1 files=1 size=22.83MB
-| predicates: tpch.part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9), tpch.part.p_brand != 'Brand#45', NOT tpch.part.p_type LIKE 'MEDIUM POLISHED%'
+| predicates: tpch.part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT tpch.part.p_type LIKE 'MEDIUM POLISHED%', tpch.part.p_brand != 'Brand#45'
| row-size=65B cardinality=8.00K
|
00:SCAN HDFS [tpch.partsupp]