You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/09/27 09:51:46 UTC

[impala] 03/07: IMPALA-7560: Set selectivity of Not-equal

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.0.1
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8862719d87ac5dc214985025463f002d41b15672
Author: liuyao <54...@163.com>
AuthorDate: Mon Apr 26 14:37:43 2021 +0800

    IMPALA-7560: Set selectivity of Not-equal
    
    Calculate binary predicate selectivity if one of the children is
    a slotref and the other children are all constant.
    eg. something like "col != 5", but not "2 * col != 10"
    
    selectivity = 1 - 1/ndv
    
    Testing:
    Modify the function testNeSelectivity() of the
    ExprCardinalityTest.java, change -1 to the correct value.
    
    Change-Id: Icd6f5945840ea2a8194d72aa440ddfa6915cbb3a
    Reviewed-on: http://gerrit.cloudera.org:8080/17344
    Reviewed-by: Qifan Chen <qc...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
---
 .../apache/impala/analysis/BinaryPredicate.java    | 41 +++++++++++--
 .../impala/analysis/ExprCardinalityTest.java       | 71 +++++++++-------------
 .../org/apache/impala/planner/CardinalityTest.java |  4 +-
 .../queries/PlannerTest/analytic-fns.test          |  4 +-
 .../queries/PlannerTest/card-scan.test             |  4 +-
 .../queries/PlannerTest/hbase.test                 |  8 +--
 .../queries/PlannerTest/inline-view-limit.test     |  4 +-
 .../queries/PlannerTest/kudu.test                  |  4 +-
 .../queries/PlannerTest/predicate-propagation.test |  8 +--
 .../queries/PlannerTest/tpch-all.test              | 50 +++++++--------
 .../queries/PlannerTest/tpch-kudu.test             |  2 +-
 .../queries/PlannerTest/tpch-nested.test           |  8 +--
 .../queries/PlannerTest/tpch-views.test            |  2 +-
 13 files changed, 110 insertions(+), 100 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
index 9092058..83ab86c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
+++ b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.impala.catalog.Db;
+import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.Function.CompareMode;
 import org.apache.impala.catalog.ScalarFunction;
 import org.apache.impala.catalog.Type;
@@ -237,14 +238,42 @@ public class BinaryPredicate extends Predicate {
     // TODO: Compute selectivity for nested predicates.
     // TODO: Improve estimation using histograms.
     Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
-    if ((op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT)
-        && isSingleColumnPredicate(slotRefRef, null)) {
-      long distinctValues = slotRefRef.getRef().getNumDistinctValues();
-      if (distinctValues > 0) {
-        selectivity_ = 1.0 / distinctValues;
-        selectivity_ = Math.max(0, Math.min(1, selectivity_));
+    if (!isSingleColumnPredicate(slotRefRef, null)) {
+      return;
+    }
+    long distinctValues = slotRefRef.getRef().getNumDistinctValues();
+    if (distinctValues < 0) {
+      // Lack of statistics to estimate the selectivity.
+      return;
+    } else if (distinctValues == 0 && (op_ == Operator.EQ || op_ == Operator.NE)) {
+      // If the table is empty, then distinctValues is 0. This case is equivalent
+      // to selectivity is 0.
+      selectivity_ = 0.0;
+      return;
+    }
+
+    if (op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT) {
+      selectivity_ = 1.0 / distinctValues;
+    } else if (op_ == Operator.NE || op_ == Operator.DISTINCT_FROM) {
+      // For case <column> IS DISTINCT FROM NULL, all non-null values are true
+      if (Expr.IS_NULL_LITERAL.apply(getChild(1)) && op_ == Operator.DISTINCT_FROM) {
+        selectivity_ = 1.0;
+      } else {
+        selectivity_ = 1.0 - 1.0 / distinctValues;
       }
+      SlotDescriptor slotDesc = slotRefRef.getRef().getDesc();
+      if (slotDesc.getStats().hasNullsStats()) {
+        FeTable table = slotDesc.getParent().getTable();
+        if (table != null && table.getNumRows() > 0) {
+          long numRows = table.getNumRows();
+          selectivity_ *=
+              (double) (numRows - slotDesc.getStats().getNumNulls()) / numRows;
+        }
+      }
+    } else {
+      return;
     }
+    selectivity_ = Math.max(0, Math.min(1, selectivity_));
   }
 
   @Override
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
index 061e4a9..efd0378 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
@@ -314,23 +314,16 @@ public class ExprCardinalityTest {
    */
   @Test
   public void testNeSelectivity() throws ImpalaException {
-    // Bug: No estimated selectivity for != (IMPALA-8039)
-    //verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300);
-    verifySelectExpr("alltypes", "id != 10", 3, -1);
-    //verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2);
-    verifySelectExpr("alltypes", "bool_col != true", 3, -1);
-    //verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10);
-    verifySelectExpr("alltypes", "int_col != 10", 3, -1);
-
-    //verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26);
-    verifySelectExpr("nullrows", "id != 'foo'", 3, -1);
-    // Bug: All nulls, so NDV should = 1, so Sel should be 1 - 1.0/1
-    //verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1);
-    verifySelectExpr("nullrows", "null_str != 'foo'", 3, -1);
-    //verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6);
-    verifySelectExpr("nullrows", "group_str != 'foo'", 3, -1);
-    //verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, 1 - 1.0/7);
-    verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, -1);
+    verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300);
+    verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2);
+    verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10);
+    verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26);
+    verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1);
+    verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6);
+    verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, (1 - 1.0/6)*6/26);
+    // field has no statistics.
+    verifySelectExpr("emptytable", "field != 'foo'", 3, -1);
+    verifySelectExpr("emptytable", "f2 != 10", 3, 0.0);
 
     // Bug: Sel should default to 1 - good old 0.1
     verifySelectExpr("manynulls", "id != 10", 3, -1);
@@ -343,36 +336,28 @@ public class ExprCardinalityTest {
    */
   @Test
   public void testDistinctSelectivity() throws ImpalaException {
-    // BUG: IS DISTINCT has no selectivity
-    //verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300);
-    verifySelectExpr("alltypes", "id is distinct from 10", 3, -1);
+
+    verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300);
     // Bug: does not treat NULL specially
     // Bug: NDV sould be 2 since IS DISTINCT won't return NULL
     //verifySelectExpr("alltypes", "id is distinct from null", 2, 1);
-    verifySelectExpr("alltypes", "id is distinct from null", 3, -1);
-    //verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2);
-    verifySelectExpr("alltypes", "bool_col is distinct from true", 3, -1);
-    //verifySelectExpr("alltypes", "bool_col is distinct from null", 2, 1);
-    verifySelectExpr("alltypes", "bool_col is distinct from null", 3, -1);
-    //verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10);
-    verifySelectExpr("alltypes", "int_col is distinct from 10", 3, -1);
-    //verifySelectExpr("alltypes", "int_col is distinct from null", 2, 1);
-    verifySelectExpr("alltypes", "int_col is distinct from null", 3, -1);
-
-    //verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26);
-    verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, -1);
-    //verifySelectExpr("nullrows", "id is distinct from null", 2, 1);
-    verifySelectExpr("nullrows", "id is distinct from null", 3, -1);
-    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
-    //verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 2, 1 - 1.0/1);
-    verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, -1);
-    verifySelectExpr("nullrows", "null_str is distinct from null", 3, -1);
-    //verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6);
-    verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, -1);
-    //verifySelectExpr("nullrows", "group_str is distinct from null", 2, 0);
-    verifySelectExpr("nullrows", "group_str is distinct from null", 3, -1);
-    //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1 - 1.0/7);
+    verifySelectExpr("alltypes", "id is distinct from null", 3, 1);
+    verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 1.0/2);
+
+    verifySelectExpr("alltypes", "bool_col is distinct from null", 3, 1);
+    verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 1.0/10);
+
+    verifySelectExpr("alltypes", "int_col is distinct from null", 3, 1);
+    verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26);
+
+    verifySelectExpr("nullrows", "id is distinct from null", 3, 1);
+    verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, 1 - 1.0/1);
+    verifySelectExpr("nullrows", "null_str is distinct from null", 3, 0);
+    verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 1.0/6);
+    verifySelectExpr("nullrows", "group_str is distinct from null", 3, 1);
+    verifySelectExpr("nullrows", "group_str is distinct from null", 3, 1);
     verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 1.0/6);
+    verifySelectExpr("nullrows", "some_nulls is distinct from null", 3, 6.0/26.0);
 
     // Bug: Sel should default to 1 - good old 0.1
     verifySelectExpr("manynulls", "id is distinct from 10", 3, -1);
diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
index f634b28..91fc296 100644
--- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
@@ -67,10 +67,8 @@ public class CardinalityTest extends PlannerTestBase {
     verifyCardinality(
         "SELECT id FROM functional.alltypes WHERE int_col = 1", 7300/10);
 
-    // Assume classic 0.1 selectivity for other operators
-    // IMPALA-7560 says this should be revised.
     verifyCardinality(
-        "SELECT id FROM functional.alltypes WHERE int_col != 1", 730);
+        "SELECT id FROM functional.alltypes WHERE int_col != 1", 6570);
 
     // IMPALA-7601 says the following should be revised.
     verifyCardinality(
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index 593ac72..a01b401 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -2086,7 +2086,7 @@ PLAN-ROOT SINK
 |
 05:SELECT
 |  predicates: id IS NULL, tinyint_col != 5
-|  row-size=9B cardinality=730
+|  row-size=9B cardinality=699
 |
 00:UNION
 |  row-size=9B cardinality=7.30K
@@ -2124,7 +2124,7 @@ PLAN-ROOT SINK
 |
 05:SELECT
 |  predicates: id IS NULL, tinyint_col != 5
-|  row-size=9B cardinality=730
+|  row-size=9B cardinality=699
 |
 00:UNION
 |  row-size=9B cardinality=7.30K
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
index cf4c316..20aee39 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
@@ -156,8 +156,6 @@ PLAN-ROOT SINK
 # Not-equal, card = 1 - 1/ndv
 # Use smaller table so effect is clear
 # |T|=8, NDV=8
-# Bug: IMPALA-8039
-# Bug, expected cardinality ~7
 select *
 from functional.alltypestiny
 where id != 10
@@ -167,7 +165,7 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional.alltypestiny]
    HDFS partitions=4/4 files=4 size=460B
    predicates: id != 10
-   row-size=89B cardinality=1
+   row-size=89B cardinality=7
 ====
 # Inequality. No useful stats.
 # Bug: IMPALA-8037, Assumes sel = 0.1
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
index 5a26b0f..f829753 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
@@ -411,10 +411,10 @@ PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters:
-  d:string_col NOT_EQUAL '2'
   d:string_col GREATER_OR_EQUAL '4'
   d:date_string_col EQUAL '04/03/09'
-   predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
+  d:string_col NOT_EQUAL '2'
+   predicates: string_col >= '4', date_string_col = '04/03/09', string_col != '2'
    row-size=89B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
@@ -423,10 +423,10 @@ PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.alltypessmall]
    hbase filters:
-  d:string_col NOT_EQUAL '2'
   d:string_col GREATER_OR_EQUAL '4'
   d:date_string_col EQUAL '04/03/09'
-   predicates: string_col != '2', string_col >= '4', date_string_col = '04/03/09'
+  d:string_col NOT_EQUAL '2'
+   predicates: string_col >= '4', date_string_col = '04/03/09', string_col != '2'
    row-size=89B cardinality=1
 ====
 # mix of predicates and functional_hbase. filters
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
index 266d528..514bf0d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view-limit.test
@@ -708,7 +708,7 @@ PLAN-ROOT SINK
 |
 02:SCAN HDFS [functional.alltypessmall]
    partitions=4/4 files=4 size=6.32KB
-   predicates: functional.alltypessmall.id != 1, functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, id != 2
+   predicates: functional.alltypessmall.id > 10, functional.alltypessmall.id > 20, functional.alltypessmall.id != 1, id != 2
    runtime filters: RF000 -> id
    row-size=4B cardinality=10
 ====
@@ -772,7 +772,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
-   predicates: functional.alltypes.id != 2, functional.alltypes.id > 10, functional.alltypes.id > 20, id != 1
+   predicates: functional.alltypes.id > 10, functional.alltypes.id > 20, functional.alltypes.id != 2, id != 1
    runtime filters: RF000 -> id
    row-size=4B cardinality=730
 ====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 824188a..6bdcc41 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -143,7 +143,7 @@ and zip > 1 and zip < 50
 PLAN-ROOT SINK
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   kudu predicates: id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0, name = 'foo'
+   kudu predicates: name = 'foo', id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0
    row-size=28B cardinality=0
 ---- SCANRANGELOCATIONS
 NODE 0:
@@ -154,7 +154,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   kudu predicates: id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0, name = 'foo'
+   kudu predicates: name = 'foo', id <= 20, id >= 10, zip < 50, zip <= 30, zip <= 5, zip > 1, zip >= 0
    row-size=28B cardinality=0
 ====
 # Constant propagation works for Kudu
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
index b977b98..74ff2b4 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
@@ -585,7 +585,7 @@ PLAN-ROOT SINK
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
 |     HDFS partitions=2/4 files=2 size=3.16KB
-|     predicates: functional.alltypessmall.int_col != 5, id > 11
+|     predicates: id > 11, functional.alltypessmall.int_col != 5
 |     row-size=16B cardinality=5
 |
 00:SCAN HDFS [functional.alltypes a]
@@ -629,7 +629,7 @@ PLAN-ROOT SINK
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
 |     HDFS partitions=2/4 files=2 size=3.16KB
-|     predicates: functional.alltypessmall.int_col != 5, id > 11
+|     predicates:  id > 11, functional.alltypessmall.int_col != 5
 |     row-size=16B cardinality=5
 |
 00:SCAN HDFS [functional.alltypes a]
@@ -672,7 +672,7 @@ PLAN-ROOT SINK
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
 |     HDFS partitions=2/4 files=2 size=3.16KB
-|     predicates: functional.alltypessmall.int_col != 5, id > 11
+|     predicates: id > 11, functional.alltypessmall.int_col != 5
 |     row-size=16B cardinality=5
 |
 00:SCAN HDFS [functional.alltypes a]
@@ -709,7 +709,7 @@ PLAN-ROOT SINK
 |  01:SCAN HDFS [functional.alltypessmall]
 |     partition predicates: functional.alltypessmall.year = 2009, functional.alltypessmall.month <= 2
 |     HDFS partitions=2/4 files=2 size=3.16KB
-|     predicates: functional.alltypessmall.int_col != 5, id > 11
+|     predicates: id > 11, functional.alltypessmall.int_col != 5
 |     row-size=16B cardinality=5
 |
 00:SCAN HDFS [functional.alltypes a]
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index 917e5c1..e54f6a3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -3453,20 +3453,20 @@ PLAN-ROOT SINK
 |
 07:SORT
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 06:AGGREGATE [FINALIZE]
 |  output: count(ps_suppkey)
 |  group by: p_brand, p_type, p_size
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 05:AGGREGATE
 |  group by: p_brand, p_type, p_size, ps_suppkey
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
 |  hash predicates: ps_suppkey = s_suppkey
-|  row-size=81B cardinality=31.92K
+|  row-size=81B cardinality=31.49K
 |
 |--02:SCAN HDFS [tpch.supplier]
 |     HDFS partitions=1/1 files=1 size=1.33MB
@@ -3476,12 +3476,12 @@ PLAN-ROOT SINK
 03:HASH JOIN [INNER JOIN]
 |  hash predicates: ps_partkey = p_partkey
 |  runtime filters: RF000 <- p_partkey
-|  row-size=81B cardinality=31.92K
+|  row-size=81B cardinality=31.49K
 |
 |--01:SCAN HDFS [tpch.part]
 |     HDFS partitions=1/1 files=1 size=22.83MB
-|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-|     row-size=65B cardinality=8.00K
+|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+|     row-size=65B cardinality=7.89K
 |
 00:SCAN HDFS [tpch.partsupp]
    HDFS partitions=1/1 files=1 size=112.71MB
@@ -3497,33 +3497,33 @@ PLAN-ROOT SINK
 |
 07:SORT
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 13:AGGREGATE [FINALIZE]
 |  output: count:merge(ps_suppkey)
 |  group by: p_brand, p_type, p_size
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 12:EXCHANGE [HASH(p_brand,p_type,p_size)]
 |
 06:AGGREGATE [STREAMING]
 |  output: count(ps_suppkey)
 |  group by: p_brand, p_type, p_size
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 11:AGGREGATE
 |  group by: p_brand, p_type, p_size, ps_suppkey
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 10:EXCHANGE [HASH(p_brand,p_type,p_size,ps_suppkey)]
 |
 05:AGGREGATE [STREAMING]
 |  group by: p_brand, p_type, p_size, ps_suppkey
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST]
 |  hash predicates: ps_suppkey = s_suppkey
-|  row-size=81B cardinality=31.92K
+|  row-size=81B cardinality=31.49K
 |
 |--09:EXCHANGE [BROADCAST]
 |  |
@@ -3535,14 +3535,14 @@ PLAN-ROOT SINK
 03:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ps_partkey = p_partkey
 |  runtime filters: RF000 <- p_partkey
-|  row-size=81B cardinality=31.92K
+|  row-size=81B cardinality=31.49K
 |
 |--08:EXCHANGE [BROADCAST]
 |  |
 |  01:SCAN HDFS [tpch.part]
 |     HDFS partitions=1/1 files=1 size=22.83MB
-|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-|     row-size=65B cardinality=8.00K
+|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+|     row-size=65B cardinality=7.89K
 |
 00:SCAN HDFS [tpch.partsupp]
    HDFS partitions=1/1 files=1 size=112.71MB
@@ -3558,33 +3558,33 @@ PLAN-ROOT SINK
 |
 07:SORT
 |  order by: count(ps_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 13:AGGREGATE [FINALIZE]
 |  output: count:merge(ps_suppkey)
 |  group by: p_brand, p_type, p_size
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 12:EXCHANGE [HASH(p_brand,p_type,p_size)]
 |
 06:AGGREGATE [STREAMING]
 |  output: count(ps_suppkey)
 |  group by: p_brand, p_type, p_size
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 11:AGGREGATE
 |  group by: p_brand, p_type, p_size, ps_suppkey
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 10:EXCHANGE [HASH(p_brand,p_type,p_size,ps_suppkey)]
 |
 05:AGGREGATE [STREAMING]
 |  group by: p_brand, p_type, p_size, ps_suppkey
-|  row-size=65B cardinality=31.92K
+|  row-size=65B cardinality=31.49K
 |
 04:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST]
 |  hash predicates: ps_suppkey = s_suppkey
-|  row-size=81B cardinality=31.92K
+|  row-size=81B cardinality=31.49K
 |
 |--JOIN BUILD
 |  |  join-table-id=00 plan-id=01 cohort-id=01
@@ -3599,7 +3599,7 @@ PLAN-ROOT SINK
 |
 03:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: ps_partkey = p_partkey
-|  row-size=81B cardinality=31.92K
+|  row-size=81B cardinality=31.49K
 |
 |--JOIN BUILD
 |  |  join-table-id=01 plan-id=02 cohort-id=01
@@ -3610,8 +3610,8 @@ PLAN-ROOT SINK
 |  |
 |  01:SCAN HDFS [tpch.part]
 |     HDFS partitions=1/1 files=1 size=22.83MB
-|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-|     row-size=65B cardinality=8.00K
+|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+|     row-size=65B cardinality=7.89K
 |
 00:SCAN HDFS [tpch.partsupp]
    HDFS partitions=1/1 files=1 size=112.71MB
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index b5f85ab..b851b4a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -1228,7 +1228,7 @@ PLAN-ROOT SINK
 |  row-size=89B cardinality=31.92K
 |
 |--01:SCAN KUDU [tpch_kudu.part]
-|     predicates: p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
+|     predicates: NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
 |     kudu predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9)
 |     row-size=73B cardinality=8.00K
 |
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index 94759c8..c5a158d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -2307,8 +2307,8 @@ PLAN-ROOT SINK
 |
 |--05:SCAN HDFS [tpch_nested_parquet.part p]
 |     HDFS partitions=1/1 files=1 size=6.24MB
-|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-|     row-size=65B cardinality=8.00K
+|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+|     row-size=65B cardinality=7.89K
 |
 01:SUBPLAN
 |  row-size=103B cardinality=10.00K
@@ -2368,8 +2368,8 @@ PLAN-ROOT SINK
 |  |
 |  05:SCAN HDFS [tpch_nested_parquet.part p]
 |     HDFS partitions=1/1 files=1 size=6.24MB
-|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
-|     row-size=65B cardinality=8.00K
+|     predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT p_type LIKE 'MEDIUM POLISHED%', p_brand != 'Brand#45'
+|     row-size=65B cardinality=7.89K
 |
 01:SUBPLAN
 |  row-size=103B cardinality=10.00K
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index adc56aa..ff599c0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -1222,7 +1222,7 @@ PLAN-ROOT SINK
 |
 |--01:SCAN HDFS [tpch.part]
 |     HDFS partitions=1/1 files=1 size=22.83MB
-|     predicates: tpch.part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9), tpch.part.p_brand != 'Brand#45', NOT tpch.part.p_type LIKE 'MEDIUM POLISHED%'
+|     predicates: tpch.part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9), NOT tpch.part.p_type LIKE 'MEDIUM POLISHED%', tpch.part.p_brand != 'Brand#45'
 |     row-size=65B cardinality=8.00K
 |
 00:SCAN HDFS [tpch.partsupp]