You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2022/11/07 21:27:18 UTC

[impala] 01/02: IMPALA-11681: Set table stats for the Iceberg table by it's partition stats

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f3504566fb97719eec81771a61785cedc85ba6fa
Author: LPL <li...@sensorsdata.cn>
AuthorDate: Tue Oct 25 12:22:12 2022 +0800

    IMPALA-11681: Set table stats for the Iceberg table by it's partition stats
    
    For the Iceberg tables, table-level statistics such as numRows can be
    computed according to iceberg parition stats, which is more accurate and
    real-time. Obtaining these statistics is independent of
    StatsSetupConst.ROW_COUNT and StatsSetupConst.TOTAL_SIZE in HMS. This is
    an improvement for estimating the cardinality of the Iceberg tables.
    But now the calculation of V2 Iceberg table is not accurate, maybe after
    IMPALA-11516(Return better partition stats for V2 tables) is ready, they
    can be considered to replace those MHS statistics.
    
    Testing:
     - Existing tests
     - Test on 'On-demand Metadata' mode
     - For 'select * from
     iceberg_v2_positional_not_all_data_files_have_delete_files where i =
     (select max(i) from iceberg_v2_positional_update_all_rows)', the 'Join
     Order' and 'Distribution Mode' are the same as when table stats are
     present
    
    Change-Id: I3e92d3f25e2a57a64556249410d0af3522598c00
    Reviewed-on: http://gerrit.cloudera.org:8080/19168
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 common/fbs/CatalogObjects.fbs                      |   2 +-
 common/protobuf/planner.proto                      |   2 +-
 common/thrift/PlanNodes.thrift                     |   2 +-
 .../org/apache/impala/catalog/FeIcebergTable.java  |  48 +++-
 .../impala/catalog/IcebergContentFileStore.java    |   2 +
 .../org/apache/impala/catalog/IcebergTable.java    |  11 +-
 .../impala/catalog/local/LocalIcebergTable.java    |   1 +
 .../queries/PlannerTest/iceberg-v2-tables.test     | 250 +++++++++++----------
 .../queries/PlannerTest/tablesample.test           |  36 +--
 9 files changed, 198 insertions(+), 156 deletions(-)

diff --git a/common/fbs/CatalogObjects.fbs b/common/fbs/CatalogObjects.fbs
index 973007d2c..8ecfb2f11 100644
--- a/common/fbs/CatalogObjects.fbs
+++ b/common/fbs/CatalogObjects.fbs
@@ -80,7 +80,7 @@ table FbFileDesc {
   // Whether this file is erasure-coded
   is_ec: bool = false (id: 5);
 
-  // The absolute path of the file, it`s used only when data files are outside of
+  // The absolute path of the file, it's used only when data files are outside of
   // the Iceberg table location (IMPALA-11507).
   absolute_path: string (id: 6);
 }
diff --git a/common/protobuf/planner.proto b/common/protobuf/planner.proto
index 208ebecd4..4e7c8ac63 100644
--- a/common/protobuf/planner.proto
+++ b/common/protobuf/planner.proto
@@ -55,7 +55,7 @@ message HdfsFileSplitPB {
   // any consistent hash.
   optional int32 partition_path_hash = 9;
 
-  // The absolute path of the file, it`s used only when data files are outside of
+  // The absolute path of the file, it's used only when data files are outside of
   // the Iceberg table location (IMPALA-11507).
   optional string absolute_path = 10;
 }
diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift
index 70e208637..46e8a5a30 100644
--- a/common/thrift/PlanNodes.thrift
+++ b/common/thrift/PlanNodes.thrift
@@ -224,7 +224,7 @@ struct THdfsFileSplit {
   // any consistent hash.
   9: required i32 partition_path_hash
 
-  // The absolute path of the file, it`s used only when data files are outside of
+  // The absolute path of the file, it's used only when data files are outside of
   // the Iceberg table location (IMPALA-11507).
   10: optional string absolute_path
 }
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index 0ed334a31..eaf4d24a2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -311,6 +311,22 @@ public interface FeIcebergTable extends FeFsTable {
     return -1;
   }
 
+  /**
+   * Sets 'tableStats_' for the Iceberg table by it's partition stats.
+   * TODO: Now the calculation of V2 Iceberg table is not accurate. After
+   * IMPALA-11516(Return better partition stats for V2 tables) is ready, this method can
+   * be considered to replace
+   * {@link Table#setTableStats(org.apache.hadoop.hive.metastore.api.Table)}.
+   */
+  default void setIcebergTableStats() {
+    Preconditions.checkState(getTTableStats() != null);
+    Preconditions.checkState(getIcebergPartitionStats() != null);
+    if (getTTableStats().getNum_rows() < 0) {
+      getTTableStats().setNum_rows(Utils.calculateNumRows(this));
+    }
+    getTTableStats().setTotal_file_bytes(Utils.calculateFileSizeInBytes(this));
+  }
+
   /**
    * Utility functions
    */
@@ -397,17 +413,9 @@ public interface FeIcebergTable extends FeFsTable {
       result.setSchema(resultSchema);
 
       TResultRowBuilder rowBuilder = new TResultRowBuilder();
-      Map<String, TIcebergPartitionStats> nameToStats = table.getIcebergPartitionStats();
-      if (table.getNumRows() >= 0) {
-        rowBuilder.add(table.getNumRows());
-      } else {
-        rowBuilder.add(nameToStats.values().stream().mapToLong(
-            TIcebergPartitionStats::getNum_rows).sum());
-      }
-      rowBuilder.add(nameToStats.values().stream().mapToLong(
-          TIcebergPartitionStats::getNum_files).sum());
-      rowBuilder.addBytes(nameToStats.values().stream().mapToLong(
-          TIcebergPartitionStats::getFile_size_in_bytes).sum());
+      rowBuilder.add(table.getNumRows());
+      rowBuilder.add(table.getContentFileStore().getNumFiles());
+      rowBuilder.addBytes(table.getTTableStats().getTotal_file_bytes());
       if (!table.isMarkedCached()) {
         rowBuilder.add("NOT CACHED");
         rowBuilder.add("NOT CACHED");
@@ -436,6 +444,24 @@ public interface FeIcebergTable extends FeFsTable {
       return result;
     }
 
+    /**
+     * Calculate num rows for the given iceberg table by it's partition stats.
+     * The result is computed by all DataFiles without any DeleteFile.
+     */
+    public static long calculateNumRows(FeIcebergTable table) {
+      return table.getIcebergPartitionStats().values().stream()
+          .mapToLong(TIcebergPartitionStats::getNum_rows).sum();
+    }
+
+    /**
+     * Calculate file size in bytes for the given iceberg table by it's partition stats.
+     * The result is computed by all ContentFiles, including DataFile and DeleteFile.
+     */
+    public static long calculateFileSizeInBytes(FeIcebergTable table) {
+      return table.getIcebergPartitionStats().values().stream()
+          .mapToLong(TIcebergPartitionStats::getFile_size_in_bytes).sum();
+    }
+
     /**
      * Get the field schema list of the current PartitionSpec from Iceberg table.
      *
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
index 5e84f3227..ee7cc0974 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
@@ -97,6 +97,8 @@ public class IcebergContentFileStore {
 
   public List<FileDescriptor> getDeleteFiles() { return deleteFiles_; }
 
+  public long getNumFiles() { return dataFiles_.size() + deleteFiles_.size(); }
+
   public Iterable<FileDescriptor> getAllFiles() {
     return Iterables.concat(dataFiles_, deleteFiles_);
   }
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index 2f1a81174..6d239e8a5 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -17,16 +17,18 @@
 
 package org.apache.impala.catalog;
 
+import com.codahale.metrics.Timer;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-
 import org.apache.hadoop.hive.common.StatsSetupConst;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.impala.analysis.IcebergPartitionField;
 import org.apache.impala.analysis.IcebergPartitionSpec;
 import org.apache.impala.analysis.IcebergPartitionTransform;
@@ -51,10 +53,6 @@ import org.apache.impala.util.IcebergSchemaConverter;
 import org.apache.impala.util.IcebergUtil;
 import org.apache.thrift.TException;
 
-import com.codahale.metrics.Timer;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
 /**
  * Representation of an Iceberg table in the catalog cache.
  */
@@ -359,6 +357,7 @@ public class IcebergTable extends Table implements FeIcebergTable {
             .load(false, msClient, msTable_, true, true, false, null, null,null, reason);
         fileStore_ = Utils.loadAllPartition(this);
         partitionStats_ = Utils.loadPartitionStats(this);
+        setIcebergTableStats();
         loadAllColumnStats(msClient);
       } catch (Exception e) {
         throw new IcebergTableLoadingException("Error loading metadata for Iceberg table "
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
index dc1c11ef5..1813ce00e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
@@ -127,6 +127,7 @@ public class LocalIcebergTable extends LocalTable implements FeIcebergTable {
     icebergParquetPlainPageSize_ = Utils.getIcebergParquetPlainPageSize(msTable);
     icebergParquetDictPageSize_ = Utils.getIcebergParquetDictPageSize(msTable);
     partitionStats_ = tableInfo.getIceberg_table().getPartition_stats();
+    setIcebergTableStats();
     addVirtualColumns(ref.getVirtualColumns());
   }
 
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
index 621fd7e4a..d9db7f81f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
@@ -23,7 +23,7 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.iceberg_v2_no_deletes]
    HDFS partitions=1/1 files=1 size=625B
    predicates: i > 1
-   row-size=4B cardinality=400
+   row-size=4B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -40,7 +40,7 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.iceberg_v2_no_deletes]
    HDFS partitions=1/1 files=1 size=625B
    predicates: i > 1
-   row-size=4B cardinality=400
+   row-size=4B cardinality=1
 ====
 SELECT count(*) from iceberg_v2_delete_positional;
 ---- PLAN
@@ -59,7 +59,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
    HDFS partitions=1/1 files=1 size=662B
-   row-size=20B cardinality=4.73K
+   row-size=20B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -74,7 +74,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=1
 |
 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  row-size=20B cardinality=4.73K
+|  row-size=20B cardinality=3
 |
 |--04:EXCHANGE [BROADCAST]
 |  |
@@ -84,14 +84,14 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
    HDFS partitions=1/1 files=1 size=662B
-   row-size=20B cardinality=4.73K
+   row-size=20B cardinality=3
 ====
 SELECT * from iceberg_v2_delete_positional;
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  row-size=40B cardinality=4.73K
+|  row-size=40B cardinality=3
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-01 functional_parquet.iceberg_v2_delete_positional-position-delete]
 |     HDFS partitions=1/1 files=1 size=1.54KB
@@ -99,14 +99,14 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
    HDFS partitions=1/1 files=1 size=662B
-   row-size=40B cardinality=4.73K
+   row-size=40B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  row-size=40B cardinality=4.73K
+|  row-size=40B cardinality=3
 |
 |--03:EXCHANGE [BROADCAST]
 |  |
@@ -116,14 +116,14 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
    HDFS partitions=1/1 files=1 size=662B
-   row-size=40B cardinality=4.73K
+   row-size=40B cardinality=3
 ====
 SELECT * from iceberg_v2_positional_delete_all_rows;
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  row-size=36B cardinality=8.93K
+|  row-size=36B cardinality=3
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_delete_all_rows-position-delete]
 |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -131,14 +131,14 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
    HDFS partitions=1/1 files=1 size=625B
-   row-size=36B cardinality=8.93K
+   row-size=36B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 04:EXCHANGE [UNPARTITIONED]
 |
 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  row-size=36B cardinality=8.93K
+|  row-size=36B cardinality=3
 |
 |--03:EXCHANGE [BROADCAST]
 |  |
@@ -148,7 +148,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
    HDFS partitions=1/1 files=1 size=625B
-   row-size=36B cardinality=8.93K
+   row-size=36B cardinality=3
 ====
 SELECT * from iceberg_v2_no_deletes limit 1
 ---- PLAN
@@ -183,7 +183,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
    HDFS partitions=1/1 files=1 size=625B
-   row-size=36B cardinality=8.93K
+   row-size=36B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -202,7 +202,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
    HDFS partitions=1/1 files=1 size=625B
-   row-size=36B cardinality=8.93K
+   row-size=36B cardinality=3
 ====
 SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files limit 1
 ---- PLAN
@@ -214,7 +214,7 @@ PLAN-ROOT SINK
 |  row-size=36B cardinality=1
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=34.20K
+|  |  row-size=36B cardinality=10
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -222,15 +222,15 @@ PLAN-ROOT SINK
 |  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
 |  limit: 1
 |
 04:UNION
@@ -238,22 +238,24 @@ PLAN-ROOT SINK
 |  limit: 1
 |  row-size=36B cardinality=1
 |
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=36B cardinality=10
 |  |
-|  |--05:EXCHANGE [BROADCAST]
+|  |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
 |  |  01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
 |  |     row-size=245B cardinality=4
 |  |
+|  05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ====
 SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files
 ---- PLAN
@@ -261,10 +263,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=34.20K
+|  |  row-size=36B cardinality=10
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -272,36 +274,38 @@ PLAN-ROOT SINK
 |  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=36B cardinality=10
 |  |
-|  |--05:EXCHANGE [BROADCAST]
+|  |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
 |  |  01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
 |  |     row-size=245B cardinality=4
 |  |
+|  05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ====
 SELECT * from iceberg_v2_positional_update_all_rows
 ---- PLAN
@@ -309,10 +313,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=27.47K
+|  row-size=36B cardinality=12
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=13.74K
+|  |  row-size=36B cardinality=6
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
 |  |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -320,11 +324,11 @@ PLAN-ROOT SINK
 |  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
 |     HDFS partitions=1/1 files=1 size=625B
-|     row-size=36B cardinality=13.74K
+|     row-size=36B cardinality=6
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
    HDFS partitions=1/1 files=1 size=625B
-   row-size=36B cardinality=13.74K
+   row-size=36B cardinality=6
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
@@ -332,10 +336,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=27.47K
+|  row-size=36B cardinality=12
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=13.74K
+|  |  row-size=36B cardinality=6
 |  |
 |  |--05:EXCHANGE [BROADCAST]
 |  |  |
@@ -345,18 +349,18 @@ PLAN-ROOT SINK
 |  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
 |     HDFS partitions=1/1 files=1 size=625B
-|     row-size=36B cardinality=13.74K
+|     row-size=36B cardinality=6
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
    HDFS partitions=1/1 files=1 size=625B
-   row-size=36B cardinality=13.74K
+   row-size=36B cardinality=6
 ====
 SELECT * from iceberg_v2_partitioned_position_deletes
 ---- PLAN
 PLAN-ROOT SINK
 |
 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  row-size=64B cardinality=4.96K
+|  row-size=64B cardinality=20
 |
 |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
@@ -364,24 +368,26 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes]
    HDFS partitions=1/1 files=3 size=3.48KB
-   row-size=64B cardinality=4.96K
+   row-size=64B cardinality=20
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-04:EXCHANGE [UNPARTITIONED]
+05:EXCHANGE [UNPARTITIONED]
 |
-02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  row-size=64B cardinality=4.96K
+02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  row-size=64B cardinality=20
 |
-|--03:EXCHANGE [BROADCAST]
+|--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.pos,functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
 |  |
 |  01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
 |     HDFS partitions=1/1 files=3 size=9.47KB
 |     row-size=182B cardinality=10
 |
+03:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.file__position,functional_parquet.iceberg_v2_partitioned_position_deletes.input__file__name)]
+|
 00:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes]
    HDFS partitions=1/1 files=3 size=3.48KB
-   row-size=64B cardinality=4.96K
+   row-size=64B cardinality=20
 ====
 SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files
 WHERE i > 2
@@ -390,10 +396,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=6.84K
+|  row-size=36B cardinality=2
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=3.42K
+|  |  row-size=36B cardinality=1
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -402,39 +408,41 @@ PLAN-ROOT SINK
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
 |     predicates: i > 2
-|     row-size=36B cardinality=3.42K
+|     row-size=36B cardinality=1
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
    predicates: i > 2
-   row-size=36B cardinality=3.42K
+   row-size=36B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=6.84K
+|  row-size=36B cardinality=2
 |
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=3.42K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=36B cardinality=1
 |  |
-|  |--05:EXCHANGE [BROADCAST]
+|  |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
 |  |  01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
 |  |     row-size=245B cardinality=4
 |  |
+|  05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
 |     predicates: i > 2
-|     row-size=36B cardinality=3.42K
+|     row-size=36B cardinality=1
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
    predicates: i > 2
-   row-size=36B cardinality=3.42K
+   row-size=36B cardinality=1
 ====
 select * from iceberg_v2_positional_not_all_data_files_have_delete_files for system_version as of 1497619269847778439
 minus
@@ -444,22 +452,22 @@ PLAN-ROOT SINK
 |
 07:HASH JOIN [LEFT ANTI JOIN]
 |  hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=68.39K
+|  row-size=16B cardinality=20
 |
 |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=1 size=625B
-|     row-size=16B cardinality=34.20K
+|     row-size=16B cardinality=10
 |
 05:AGGREGATE [FINALIZE]
 |  group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=68.39K
+|  row-size=16B cardinality=20
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=34.20K
+|  |  row-size=36B cardinality=10
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -467,56 +475,58 @@ PLAN-ROOT SINK
 |  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-12:EXCHANGE [UNPARTITIONED]
+13:EXCHANGE [UNPARTITIONED]
 |
 07:HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
 |  hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=68.39K
+|  row-size=16B cardinality=20
 |
-|--11:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
+|--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
 |  |
 |  06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=1 size=625B
-|     row-size=16B cardinality=34.20K
+|     row-size=16B cardinality=10
 |
-10:AGGREGATE [FINALIZE]
+11:AGGREGATE [FINALIZE]
 |  group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=68.39K
+|  row-size=16B cardinality=20
 |
-09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
+10:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
 |
 05:AGGREGATE [STREAMING]
 |  group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-|  row-size=16B cardinality=68.39K
+|  row-size=16B cardinality=20
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=36B cardinality=10
 |  |
-|  |--08:EXCHANGE [BROADCAST]
+|  |--09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
 |  |  01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
 |  |     row-size=245B cardinality=4
 |  |
+|  08:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ====
 with v as (select i + 1000 as ii, upper(s) as ss from iceberg_v2_positional_not_all_data_files_have_delete_files)
 select * from v where ii > 1003;
@@ -525,10 +535,10 @@ PLAN-ROOT SINK
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=6.84K
+|  row-size=36B cardinality=2
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=3.42K
+|  |  row-size=36B cardinality=1
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -537,39 +547,41 @@ PLAN-ROOT SINK
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
 |     predicates: i + 1000 > 1003
-|     row-size=36B cardinality=3.42K
+|     row-size=36B cardinality=1
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
    predicates: i + 1000 > 1003
-   row-size=36B cardinality=3.42K
+   row-size=36B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=6.84K
+|  row-size=36B cardinality=2
 |
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=3.42K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=36B cardinality=1
 |  |
-|  |--05:EXCHANGE [BROADCAST]
+|  |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
 |  |  01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
 |  |     row-size=245B cardinality=4
 |  |
+|  05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
 |     predicates: i + 1000 > 1003
-|     row-size=36B cardinality=3.42K
+|     row-size=36B cardinality=1
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
    predicates: i + 1000 > 1003
-   row-size=36B cardinality=3.42K
+   row-size=36B cardinality=1
 ====
 select *
 from iceberg_v2_positional_not_all_data_files_have_delete_files
@@ -580,7 +592,7 @@ PLAN-ROOT SINK
 11:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: i = max(i)
 |  runtime filters: RF000 <- max(i)
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
 |--10:AGGREGATE [FINALIZE]
 |  |  output: max(i)
@@ -588,10 +600,10 @@ PLAN-ROOT SINK
 |  |
 |  09:UNION
 |  |  pass-through-operands: all
-|  |  row-size=24B cardinality=27.47K
+|  |  row-size=24B cardinality=12
 |  |
 |  |--07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  |  row-size=24B cardinality=13.74K
+|  |  |  row-size=24B cardinality=6
 |  |  |
 |  |  |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
 |  |  |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -599,18 +611,18 @@ PLAN-ROOT SINK
 |  |  |
 |  |  05:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
 |  |     HDFS partitions=1/1 files=1 size=625B
-|  |     row-size=24B cardinality=13.74K
+|  |     row-size=24B cardinality=6
 |  |
 |  08:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
 |     HDFS partitions=1/1 files=1 size=625B
-|     row-size=24B cardinality=13.74K
+|     row-size=24B cardinality=6
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
 |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-|  |  row-size=36B cardinality=34.20K
+|  |  row-size=36B cardinality=10
 |  |
 |  |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
@@ -619,29 +631,29 @@ PLAN-ROOT SINK
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
 |     runtime filters: RF000 -> i
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
    runtime filters: RF000 -> i
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
-17:EXCHANGE [UNPARTITIONED]
+18:EXCHANGE [UNPARTITIONED]
 |
 11:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
 |  hash predicates: i = max(i)
 |  runtime filters: RF000 <- max(i)
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
-|--16:EXCHANGE [BROADCAST]
+|--17:EXCHANGE [BROADCAST]
 |  |
-|  15:AGGREGATE [FINALIZE]
+|  16:AGGREGATE [FINALIZE]
 |  |  output: max:merge(i)
 |  |  row-size=4B cardinality=1
 |  |
-|  14:EXCHANGE [UNPARTITIONED]
+|  15:EXCHANGE [UNPARTITIONED]
 |  |
 |  10:AGGREGATE
 |  |  output: max(i)
@@ -649,12 +661,12 @@ PLAN-ROOT SINK
 |  |
 |  09:UNION
 |  |  pass-through-operands: all
-|  |  row-size=24B cardinality=27.47K
+|  |  row-size=24B cardinality=12
 |  |
 |  |--07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  |  row-size=24B cardinality=13.74K
+|  |  |  row-size=24B cardinality=6
 |  |  |
-|  |  |--13:EXCHANGE [BROADCAST]
+|  |  |--14:EXCHANGE [BROADCAST]
 |  |  |  |
 |  |  |  06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
 |  |  |     HDFS partitions=1/1 files=1 size=2.60KB
@@ -662,32 +674,34 @@ PLAN-ROOT SINK
 |  |  |
 |  |  05:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
 |  |     HDFS partitions=1/1 files=1 size=625B
-|  |     row-size=24B cardinality=13.74K
+|  |     row-size=24B cardinality=6
 |  |
 |  08:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
 |     HDFS partitions=1/1 files=1 size=625B
-|     row-size=24B cardinality=13.74K
+|     row-size=24B cardinality=6
 |
 04:UNION
 |  pass-through-operands: all
-|  row-size=36B cardinality=68.39K
+|  row-size=36B cardinality=20
 |
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-|  |  row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=36B cardinality=10
 |  |
-|  |--12:EXCHANGE [BROADCAST]
+|  |--13:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
 |  |  |
 |  |  01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
 |  |     HDFS partitions=1/1 files=2 size=5.33KB
 |  |     row-size=245B cardinality=4
 |  |
+|  12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=2 size=1.22KB
 |     runtime filters: RF000 -> i
-|     row-size=36B cardinality=34.20K
+|     row-size=36B cardinality=10
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=2 size=1.22KB
    runtime filters: RF000 -> i
-   row-size=36B cardinality=34.20K
+   row-size=36B cardinality=10
 ====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
index ec8b7f7a1..56b9ae535 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
@@ -259,11 +259,11 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
    HDFS partitions=1/1 files=3 size=3.41KB
    stored statistics:
-     table: rows=unavailable size=unavailable
+     table: rows=20 size=22.90KB
      columns: unavailable
-   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   extrapolated-rows=disabled max-scan-range-rows=6
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=44B cardinality=380
+   tuple-ids=0 row-size=44B cardinality=2
    in pipelines: 00(GETNEXT)
 ====
 # Sampling Iceberg tables. Count(*) is not optimized.
@@ -284,11 +284,11 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
    HDFS partitions=1/1 files=3 size=3.41KB
    stored statistics:
-     table: rows=unavailable size=unavailable
+     table: rows=20 size=22.90KB
      columns: all
-   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   extrapolated-rows=disabled max-scan-range-rows=6
    mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
-   tuple-ids=0 row-size=0B cardinality=380
+   tuple-ids=0 row-size=0B cardinality=2
    in pipelines: 00(GETNEXT)
 ====
 # Sampling partitioned Iceberg tables.
@@ -303,11 +303,11 @@ PLAN-ROOT SINK
 00:SCAN HDFS [functional_parquet.iceberg_partitioned]
    HDFS partitions=1/1 files=10 size=11.46KB
    stored statistics:
-     table: rows=unavailable size=unavailable
+     table: rows=20 size=22.90KB
      columns: unavailable
-   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   extrapolated-rows=disabled max-scan-range-rows=2
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=44B cardinality=1.98K
+   tuple-ids=0 row-size=44B cardinality=10
    in pipelines: 00(GETNEXT)
 ====
 # Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg happens
@@ -325,13 +325,13 @@ PLAN-ROOT SINK
    HDFS partitions=1/1 files=4 size=4.57KB
    predicates: action = 'click'
    stored statistics:
-     table: rows=unavailable size=unavailable
+     table: rows=20 size=22.90KB
      columns: unavailable
-   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   extrapolated-rows=disabled max-scan-range-rows=5
    parquet statistics predicates: action = 'click'
    parquet dictionary predicates: action = 'click'
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=44B cardinality=198
+   tuple-ids=0 row-size=44B cardinality=1
    in pipelines: 00(GETNEXT)
 ====
 # Sampling Iceberg V2 tables. Delete files are not sampled, only the data files. So we
@@ -370,20 +370,20 @@ PLAN-ROOT SINK
 |  00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
 |     HDFS partitions=1/1 files=1 size=625B
 |     stored statistics:
-|       table: rows=unavailable size=unavailable
+|       table: rows=10 size=7.77KB
 |       columns missing stats: i, s
-|     extrapolated-rows=disabled max-scan-range-rows=unavailable
+|     extrapolated-rows=disabled max-scan-range-rows=10
 |     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-|     tuple-ids=0 row-size=36B cardinality=2.42K
+|     tuple-ids=0 row-size=36B cardinality=1
 |     in pipelines: 00(GETNEXT)
 |
 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
    HDFS partitions=1/1 files=1 size=620B
    stored statistics:
-     table: rows=unavailable size=unavailable
+     table: rows=10 size=7.77KB
      columns missing stats: i, s
-   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   extrapolated-rows=disabled max-scan-range-rows=10
    mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-   tuple-ids=0 row-size=36B cardinality=2.42K
+   tuple-ids=0 row-size=36B cardinality=1
    in pipelines: 03(GETNEXT)
 ====