You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/28 00:02:34 UTC

[impala] branch master updated: IMPALA-12371: Add better cardinality estimation for Iceberg V2 tables with deletes

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 0063ccc83 IMPALA-12371: Add better cardinality estimation for Iceberg V2 tables with deletes
0063ccc83 is described below

commit 0063ccc83678816b1f1ba2b314b181fdd8af82e2
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Wed Sep 6 15:53:48 2023 +0200

    IMPALA-12371: Add better cardinality estimation for Iceberg V2 tables with deletes
    
    Currently IcebergDeleteNode's cardinality is the same as the LHS's
    cardinality, i.e. we don't take the RHS into account. The RHS contains
    the position delete records, so it is a fair assumption that all records
    at RHS remove a record from LHS (duplicated delete records should be
    extremely rare).
    
    If there are conjuncts on the Iceberg table we can assume that they have
    the same selectivity on the data records and on the delete records.
    
    With the above assumptions this change updates the cardinality of the
    IcebergDeleteNode with basically the following formula:
    
     Card(IcebergDeleteNode) = Card(LHS) - Selectivity(LHS) * Card(RHS);
    
    Please note that left side's cardinality already takes the selectivity
    into account.
    
    To deal with edge cases when there are lots of duplicated delete
    records (shouldn't happen in normal usage), we return at least 1
    cardinality, so the actual formula is:
    
     Card(IcebergDeleteNode) =
       Max(
         1,
         Card(LHS) - Selectivity(LHS) * Card(RHS)
       );
    
    Testing:
     * updated the planner tests
    
    Change-Id: I988dc8d7e1074932c460b3702d3381341e5b23c5
    Reviewed-on: http://gerrit.cloudera.org:8080/20460
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../apache/impala/planner/IcebergDeleteNode.java   | 18 +++++
 .../queries/PlannerTest/iceberg-v2-delete.test     | 64 +++++++--------
 .../queries/PlannerTest/iceberg-v2-tables.test     | 94 +++++++++++-----------
 3 files changed, 97 insertions(+), 79 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java
index 09beeb9bd..11594d997 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergDeleteNode.java
@@ -85,6 +85,24 @@ public class IcebergDeleteNode extends JoinNode {
     computeStats(analyzer);
   }
 
+  @Override
+  public void computeStats(Analyzer analyzer) {
+    super.computeStats(analyzer);
+    // Compute cardinality differently. Let's assume all position delete records apply to
+    // a data record (Concurrent DELETEs should be extremely rare).
+    // Also assume that the left side's selectivity applies to the delete records as well.
+    // Please note that left side's cardinality already takes the selectivity into
+    // account (i.e. no need to do leftSelectivity * leftCard).
+    long leftCardWithSelectivity = getChild(0).cardinality_;
+    long rightCard = getChild(1).cardinality_;
+    // Both sides should have non-zero cardinalities.
+    Preconditions.checkState(leftCardWithSelectivity > 0);
+    Preconditions.checkState(rightCard > 0);
+    double leftSelectivity = getChild(0).computeSelectivity();
+    long rightCardWithSelectivity = (long)(leftSelectivity * rightCard);
+    cardinality_ = Math.max(1, leftCardWithSelectivity - rightCardWithSelectivity);
+  }
+
   @Override
   protected String debugString() {
     return MoreObjects.toStringHelper(this)
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
index b54bd67e1..c0fcdc0b9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
@@ -69,19 +69,19 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_delete_positional-POSITION-DE
 |
 08:SORT
 |  order by: input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=20B cardinality=3
+|  row-size=20B cardinality=2
 |
 07:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = min(id)
 |  runtime filters: RF000 <- min(id)
-|  row-size=28B cardinality=3
+|  row-size=28B cardinality=2
 |
 |--06:AGGREGATE [FINALIZE]
 |  |  output: min(id)
 |  |  row-size=8B cardinality=1
 |  |
 |  05:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=28B cardinality=3
+|  |  row-size=28B cardinality=2
 |  |
 |  |--04:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-04 functional_parquet.iceberg_v2_delete_positional-position-delete]
 |  |     HDFS partitions=1/1 files=1 size=1.54KB
@@ -92,7 +92,7 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_delete_positional-POSITION-DE
 |     row-size=28B cardinality=3
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=28B cardinality=3
+|  row-size=28B cardinality=2
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-01 functional_parquet.iceberg_v2_delete_positional-position-delete]
 |     HDFS partitions=1/1 files=1 size=1.54KB
@@ -107,12 +107,12 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_delete_positional-POSITION-DE
 |
 13:SORT
 |  order by: input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=20B cardinality=3
+|  row-size=20B cardinality=2
 |
 07:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
 |  hash predicates: id = min(id)
 |  runtime filters: RF000 <- min(id)
-|  row-size=28B cardinality=3
+|  row-size=28B cardinality=2
 |
 |--12:EXCHANGE [BROADCAST]
 |  |
@@ -127,7 +127,7 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_delete_positional-POSITION-DE
 |  |  row-size=8B cardinality=1
 |  |
 |  05:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  |  row-size=28B cardinality=3
+|  |  row-size=28B cardinality=2
 |  |
 |  |--09:EXCHANGE [BROADCAST]
 |  |  |
@@ -140,7 +140,7 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_delete_positional-POSITION-DE
 |     row-size=28B cardinality=3
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=28B cardinality=3
+|  row-size=28B cardinality=2
 |
 |--08:EXCHANGE [BROADCAST]
 |  |
@@ -199,10 +199,10 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 03:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=2
+|  row-size=36B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=40B cardinality=2
+|  row-size=40B cardinality=1
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
@@ -217,12 +217,12 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 06:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=2
+|  row-size=36B cardinality=1
 |
 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=40B cardinality=2
+|  row-size=40B cardinality=1
 |
 |--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
 |  |
@@ -243,10 +243,10 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 03:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=6
+|  row-size=36B cardinality=3
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=36B cardinality=6
+|  row-size=36B cardinality=3
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=1 size=3.15KB
@@ -261,10 +261,10 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 04:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=6
+|  row-size=36B cardinality=3
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=36B cardinality=6
+|  row-size=36B cardinality=3
 |
 |--03:EXCHANGE [BROADCAST]
 |  |
@@ -283,10 +283,10 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 03:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=2
+|  row-size=36B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=48B cardinality=2
+|  row-size=48B cardinality=1
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
@@ -301,12 +301,12 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 06:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=2
+|  row-size=36B cardinality=1
 |
 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=48B cardinality=2
+|  row-size=48B cardinality=1
 |
 |--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
 |  |
@@ -328,19 +328,19 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 08:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=20
+|  row-size=36B cardinality=10
 |
 07:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = max(id)
 |  runtime filters: RF000 <- max(id)
-|  row-size=40B cardinality=20
+|  row-size=40B cardinality=10
 |
 |--06:AGGREGATE [FINALIZE]
 |  |  output: max(id)
 |  |  row-size=8B cardinality=1
 |  |
 |  05:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=28B cardinality=3
+|  |  row-size=28B cardinality=2
 |  |
 |  |--04:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-04 functional_parquet.iceberg_v2_delete_positional-position-delete]
 |  |     HDFS partitions=1/1 files=1 size=1.54KB
@@ -351,7 +351,7 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |     row-size=28B cardinality=3
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=40B cardinality=20
+|  row-size=40B cardinality=10
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
@@ -366,14 +366,14 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 15:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=20
+|  row-size=36B cardinality=10
 |
 14:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 07:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
 |  hash predicates: id = max(id)
 |  runtime filters: RF000 <- max(id)
-|  row-size=40B cardinality=20
+|  row-size=40B cardinality=10
 |
 |--13:EXCHANGE [BROADCAST]
 |  |
@@ -388,7 +388,7 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |  |  row-size=8B cardinality=1
 |  |
 |  05:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  |  row-size=28B cardinality=3
+|  |  row-size=28B cardinality=2
 |  |
 |  |--10:EXCHANGE [BROADCAST]
 |  |  |
@@ -401,7 +401,7 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |     row-size=28B cardinality=3
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=40B cardinality=20
+|  row-size=40B cardinality=10
 |
 |--09:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
 |  |
@@ -422,10 +422,10 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 03:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=2
+|  row-size=36B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=40B cardinality=2
+|  row-size=40B cardinality=1
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
@@ -440,12 +440,12 @@ DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-
 |
 06:SORT
 |  order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST, input__file__name ASC NULLS LAST, file__position ASC NULLS LAST
-|  row-size=36B cardinality=2
+|  row-size=36B cardinality=1
 |
 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=40B cardinality=2
+|  row-size=40B cardinality=1
 |
 |--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
 |  |
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
index d825285f9..c0f31b79a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
@@ -51,7 +51,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=20B cardinality=3
+|  row-size=20B cardinality=2
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-01 functional_parquet.iceberg_v2_delete_positional-position-delete]
 |     HDFS partitions=1/1 files=1 size=1.54KB
@@ -74,7 +74,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=20B cardinality=3
+|  row-size=20B cardinality=2
 |
 |--04:EXCHANGE [BROADCAST]
 |  |
@@ -91,7 +91,7 @@ SELECT * from iceberg_v2_delete_positional;
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=40B cardinality=3
+|  row-size=40B cardinality=2
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-01 functional_parquet.iceberg_v2_delete_positional-position-delete]
 |     HDFS partitions=1/1 files=1 size=1.54KB
@@ -106,7 +106,7 @@ PLAN-ROOT SINK
 04:EXCHANGE [UNPARTITIONED]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=40B cardinality=3
+|  row-size=40B cardinality=2
 |
 |--03:EXCHANGE [BROADCAST]
 |  |
@@ -123,7 +123,7 @@ SELECT * from iceberg_v2_positional_delete_all_rows;
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=36B cardinality=3
+|  row-size=36B cardinality=1
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_delete_all_rows-position-delete]
 |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -138,7 +138,7 @@ PLAN-ROOT SINK
 04:EXCHANGE [UNPARTITIONED]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=36B cardinality=3
+|  row-size=36B cardinality=1
 |
 |--03:EXCHANGE [BROADCAST]
 |  |
@@ -214,7 +214,7 @@ PLAN-ROOT SINK
 |  row-size=36B cardinality=1
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -239,7 +239,7 @@ PLAN-ROOT SINK
 |  row-size=36B cardinality=1
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
@@ -263,10 +263,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -286,10 +286,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
@@ -330,7 +330,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=20B cardinality=3
+|  row-size=20B cardinality=2
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |     HDFS partitions=1/1 files=1 size=2.63KB
@@ -353,7 +353,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=20B cardinality=3
+|  row-size=20B cardinality=2
 |
 |--04:EXCHANGE [BROADCAST]
 |  |
@@ -374,7 +374,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=20B cardinality=6
+|  row-size=20B cardinality=2
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -397,7 +397,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=20B cardinality=6
+|  row-size=20B cardinality=2
 |
 |--05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |
@@ -417,10 +417,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=6
+|  row-size=36B cardinality=4
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=36B cardinality=3
+|  |  row-size=36B cardinality=1
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
 |  |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -440,10 +440,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=6
+|  row-size=36B cardinality=4
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  |  row-size=36B cardinality=3
+|  |  row-size=36B cardinality=1
 |  |
 |  |--05:EXCHANGE [BROADCAST]
 |  |  |
@@ -464,7 +464,7 @@ SELECT * from iceberg_v2_partitioned_position_deletes
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=64B cardinality=20
+|  row-size=64B cardinality=10
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
@@ -479,7 +479,7 @@ PLAN-ROOT SINK
 05:EXCHANGE [UNPARTITIONED]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=64B cardinality=20
+|  row-size=64B cardinality=10
 |
 |--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
 |  |
@@ -556,7 +556,7 @@ PLAN-ROOT SINK
 |
 07:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=10
+|  row-size=16B cardinality=6
 |
 |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=1 size=625B
@@ -564,14 +564,14 @@ PLAN-ROOT SINK
 |
 05:AGGREGATE [FINALIZE]
 |  group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=10
+|  row-size=16B cardinality=6
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -591,7 +591,7 @@ PLAN-ROOT SINK
 |
 07:HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
 |  hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=10
+|  row-size=16B cardinality=6
 |
 |--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
 |  |
@@ -601,20 +601,20 @@ PLAN-ROOT SINK
 |
 11:AGGREGATE [FINALIZE]
 |  group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=10
+|  row-size=16B cardinality=6
 |
 10:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
 |
 05:AGGREGATE [STREAMING]
 |  group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=10
+|  row-size=16B cardinality=6
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
@@ -696,7 +696,7 @@ PLAN-ROOT SINK
 11:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: i = max(i)
 |  runtime filters: RF000 <- max(i)
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--10:AGGREGATE [FINALIZE]
 |  |  output: max(i)
@@ -704,10 +704,10 @@ PLAN-ROOT SINK
 |  |
 |  09:UNION
 |  |  pass-through-operands: all
-|  |  row-size=24B cardinality=6
+|  |  row-size=24B cardinality=4
 |  |
 |  |--07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  |  row-size=24B cardinality=3
+|  |  |  row-size=24B cardinality=1
 |  |  |
 |  |  |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
 |  |  |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -723,10 +723,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -749,7 +749,7 @@ PLAN-ROOT SINK
 11:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
 |  hash predicates: i = max(i)
 |  runtime filters: RF000 <- max(i)
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--17:EXCHANGE [BROADCAST]
 |  |
@@ -765,10 +765,10 @@ PLAN-ROOT SINK
 |  |
 |  09:UNION
 |  |  pass-through-operands: all
-|  |  row-size=24B cardinality=6
+|  |  row-size=24B cardinality=4
 |  |
 |  |--07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  |  |  row-size=24B cardinality=3
+|  |  |  row-size=24B cardinality=1
 |  |  |
 |  |  |--14:EXCHANGE [BROADCAST]
 |  |  |  |
@@ -786,10 +786,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=10
+|  row-size=36B cardinality=6
 |
 |--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  |  row-size=36B cardinality=6
+|  |  row-size=36B cardinality=2
 |  |
 |  |--13:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
@@ -1011,7 +1011,7 @@ select * from iceberg_v2_partitioned_position_deletes where action = 'download';
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=64B cardinality=6
+|  row-size=64B cardinality=4
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=1 size=3.18KB
@@ -1027,7 +1027,7 @@ PLAN-ROOT SINK
 04:EXCHANGE [UNPARTITIONED]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, BROADCAST]
-|  row-size=64B cardinality=6
+|  row-size=64B cardinality=4
 |
 |--03:EXCHANGE [BROADCAST]
 |  |
@@ -1054,7 +1054,7 @@ PLAN-ROOT SINK
 |  |  row-size=8B cardinality=1
 |  |
 |  07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  |  row-size=20B cardinality=6
+|  |  row-size=20B cardinality=2
 |  |
 |  |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -1076,7 +1076,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
-|  row-size=20B cardinality=6
+|  row-size=20B cardinality=2
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -1104,7 +1104,7 @@ PLAN-ROOT SINK
 |  |  row-size=8B cardinality=1
 |  |
 |  07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  |  row-size=20B cardinality=6
+|  |  row-size=20B cardinality=2
 |  |
 |  |--17:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
@@ -1138,7 +1138,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, PARTITIONED]
-|  row-size=20B cardinality=6
+|  row-size=20B cardinality=2
 |
 |--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |