You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2022/11/07 21:27:18 UTC
[impala] 01/02: IMPALA-11681: Set table stats for the Iceberg table by it's partition stats
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit f3504566fb97719eec81771a61785cedc85ba6fa
Author: LPL <li...@sensorsdata.cn>
AuthorDate: Tue Oct 25 12:22:12 2022 +0800
IMPALA-11681: Set table stats for the Iceberg table by it's partition stats
For the Iceberg tables, table-level statistics such as numRows can be
computed according to iceberg parition stats, which is more accurate and
real-time. Obtaining these statistics is independent of
StatsSetupConst.ROW_COUNT and StatsSetupConst.TOTAL_SIZE in HMS. This is
an improvement for estimating the cardinality of the Iceberg tables.
But now the calculation of V2 Iceberg table is not accurate, maybe after
IMPALA-11516(Return better partition stats for V2 tables) is ready, they
can be considered to replace those MHS statistics.
Testing:
- Existing tests
- Test on 'On-demand Metadata' mode
- For 'select * from
iceberg_v2_positional_not_all_data_files_have_delete_files where i =
(select max(i) from iceberg_v2_positional_update_all_rows)', the 'Join
Order' and 'Distribution Mode' are the same as when table stats are
present
Change-Id: I3e92d3f25e2a57a64556249410d0af3522598c00
Reviewed-on: http://gerrit.cloudera.org:8080/19168
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
common/fbs/CatalogObjects.fbs | 2 +-
common/protobuf/planner.proto | 2 +-
common/thrift/PlanNodes.thrift | 2 +-
.../org/apache/impala/catalog/FeIcebergTable.java | 48 +++-
.../impala/catalog/IcebergContentFileStore.java | 2 +
.../org/apache/impala/catalog/IcebergTable.java | 11 +-
.../impala/catalog/local/LocalIcebergTable.java | 1 +
.../queries/PlannerTest/iceberg-v2-tables.test | 250 +++++++++++----------
.../queries/PlannerTest/tablesample.test | 36 +--
9 files changed, 198 insertions(+), 156 deletions(-)
diff --git a/common/fbs/CatalogObjects.fbs b/common/fbs/CatalogObjects.fbs
index 973007d2c..8ecfb2f11 100644
--- a/common/fbs/CatalogObjects.fbs
+++ b/common/fbs/CatalogObjects.fbs
@@ -80,7 +80,7 @@ table FbFileDesc {
// Whether this file is erasure-coded
is_ec: bool = false (id: 5);
- // The absolute path of the file, it`s used only when data files are outside of
+ // The absolute path of the file, it's used only when data files are outside of
// the Iceberg table location (IMPALA-11507).
absolute_path: string (id: 6);
}
diff --git a/common/protobuf/planner.proto b/common/protobuf/planner.proto
index 208ebecd4..4e7c8ac63 100644
--- a/common/protobuf/planner.proto
+++ b/common/protobuf/planner.proto
@@ -55,7 +55,7 @@ message HdfsFileSplitPB {
// any consistent hash.
optional int32 partition_path_hash = 9;
- // The absolute path of the file, it`s used only when data files are outside of
+ // The absolute path of the file, it's used only when data files are outside of
// the Iceberg table location (IMPALA-11507).
optional string absolute_path = 10;
}
diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift
index 70e208637..46e8a5a30 100644
--- a/common/thrift/PlanNodes.thrift
+++ b/common/thrift/PlanNodes.thrift
@@ -224,7 +224,7 @@ struct THdfsFileSplit {
// any consistent hash.
9: required i32 partition_path_hash
- // The absolute path of the file, it`s used only when data files are outside of
+ // The absolute path of the file, it's used only when data files are outside of
// the Iceberg table location (IMPALA-11507).
10: optional string absolute_path
}
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index 0ed334a31..eaf4d24a2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -311,6 +311,22 @@ public interface FeIcebergTable extends FeFsTable {
return -1;
}
+ /**
+ * Sets 'tableStats_' for the Iceberg table by it's partition stats.
+ * TODO: Now the calculation of V2 Iceberg table is not accurate. After
+ * IMPALA-11516(Return better partition stats for V2 tables) is ready, this method can
+ * be considered to replace
+ * {@link Table#setTableStats(org.apache.hadoop.hive.metastore.api.Table)}.
+ */
+ default void setIcebergTableStats() {
+ Preconditions.checkState(getTTableStats() != null);
+ Preconditions.checkState(getIcebergPartitionStats() != null);
+ if (getTTableStats().getNum_rows() < 0) {
+ getTTableStats().setNum_rows(Utils.calculateNumRows(this));
+ }
+ getTTableStats().setTotal_file_bytes(Utils.calculateFileSizeInBytes(this));
+ }
+
/**
* Utility functions
*/
@@ -397,17 +413,9 @@ public interface FeIcebergTable extends FeFsTable {
result.setSchema(resultSchema);
TResultRowBuilder rowBuilder = new TResultRowBuilder();
- Map<String, TIcebergPartitionStats> nameToStats = table.getIcebergPartitionStats();
- if (table.getNumRows() >= 0) {
- rowBuilder.add(table.getNumRows());
- } else {
- rowBuilder.add(nameToStats.values().stream().mapToLong(
- TIcebergPartitionStats::getNum_rows).sum());
- }
- rowBuilder.add(nameToStats.values().stream().mapToLong(
- TIcebergPartitionStats::getNum_files).sum());
- rowBuilder.addBytes(nameToStats.values().stream().mapToLong(
- TIcebergPartitionStats::getFile_size_in_bytes).sum());
+ rowBuilder.add(table.getNumRows());
+ rowBuilder.add(table.getContentFileStore().getNumFiles());
+ rowBuilder.addBytes(table.getTTableStats().getTotal_file_bytes());
if (!table.isMarkedCached()) {
rowBuilder.add("NOT CACHED");
rowBuilder.add("NOT CACHED");
@@ -436,6 +444,24 @@ public interface FeIcebergTable extends FeFsTable {
return result;
}
+ /**
+ * Calculate num rows for the given iceberg table by it's partition stats.
+ * The result is computed by all DataFiles without any DeleteFile.
+ */
+ public static long calculateNumRows(FeIcebergTable table) {
+ return table.getIcebergPartitionStats().values().stream()
+ .mapToLong(TIcebergPartitionStats::getNum_rows).sum();
+ }
+
+ /**
+ * Calculate file size in bytes for the given iceberg table by it's partition stats.
+ * The result is computed by all ContentFiles, including DataFile and DeleteFile.
+ */
+ public static long calculateFileSizeInBytes(FeIcebergTable table) {
+ return table.getIcebergPartitionStats().values().stream()
+ .mapToLong(TIcebergPartitionStats::getFile_size_in_bytes).sum();
+ }
+
/**
* Get the field schema list of the current PartitionSpec from Iceberg table.
*
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
index 5e84f3227..ee7cc0974 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
@@ -97,6 +97,8 @@ public class IcebergContentFileStore {
public List<FileDescriptor> getDeleteFiles() { return deleteFiles_; }
+ public long getNumFiles() { return dataFiles_.size() + deleteFiles_.size(); }
+
public Iterable<FileDescriptor> getAllFiles() {
return Iterables.concat(dataFiles_, deleteFiles_);
}
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index 2f1a81174..6d239e8a5 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -17,16 +17,18 @@
package org.apache.impala.catalog;
+import com.codahale.metrics.Timer;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
-
import org.apache.hadoop.hive.common.StatsSetupConst;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.impala.analysis.IcebergPartitionField;
import org.apache.impala.analysis.IcebergPartitionSpec;
import org.apache.impala.analysis.IcebergPartitionTransform;
@@ -51,10 +53,6 @@ import org.apache.impala.util.IcebergSchemaConverter;
import org.apache.impala.util.IcebergUtil;
import org.apache.thrift.TException;
-import com.codahale.metrics.Timer;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
/**
* Representation of an Iceberg table in the catalog cache.
*/
@@ -359,6 +357,7 @@ public class IcebergTable extends Table implements FeIcebergTable {
.load(false, msClient, msTable_, true, true, false, null, null,null, reason);
fileStore_ = Utils.loadAllPartition(this);
partitionStats_ = Utils.loadPartitionStats(this);
+ setIcebergTableStats();
loadAllColumnStats(msClient);
} catch (Exception e) {
throw new IcebergTableLoadingException("Error loading metadata for Iceberg table "
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
index dc1c11ef5..1813ce00e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
@@ -127,6 +127,7 @@ public class LocalIcebergTable extends LocalTable implements FeIcebergTable {
icebergParquetPlainPageSize_ = Utils.getIcebergParquetPlainPageSize(msTable);
icebergParquetDictPageSize_ = Utils.getIcebergParquetDictPageSize(msTable);
partitionStats_ = tableInfo.getIceberg_table().getPartition_stats();
+ setIcebergTableStats();
addVirtualColumns(ref.getVirtualColumns());
}
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
index 621fd7e4a..d9db7f81f 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
@@ -23,7 +23,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional_parquet.iceberg_v2_no_deletes]
HDFS partitions=1/1 files=1 size=625B
predicates: i > 1
- row-size=4B cardinality=400
+ row-size=4B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -40,7 +40,7 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional_parquet.iceberg_v2_no_deletes]
HDFS partitions=1/1 files=1 size=625B
predicates: i > 1
- row-size=4B cardinality=400
+ row-size=4B cardinality=1
====
SELECT count(*) from iceberg_v2_delete_positional;
---- PLAN
@@ -59,7 +59,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
HDFS partitions=1/1 files=1 size=662B
- row-size=20B cardinality=4.73K
+ row-size=20B cardinality=3
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -74,7 +74,7 @@ PLAN-ROOT SINK
| row-size=8B cardinality=1
|
02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| row-size=20B cardinality=4.73K
+| row-size=20B cardinality=3
|
|--04:EXCHANGE [BROADCAST]
| |
@@ -84,14 +84,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
HDFS partitions=1/1 files=1 size=662B
- row-size=20B cardinality=4.73K
+ row-size=20B cardinality=3
====
SELECT * from iceberg_v2_delete_positional;
---- PLAN
PLAN-ROOT SINK
|
02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| row-size=40B cardinality=4.73K
+| row-size=40B cardinality=3
|
|--01:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-01 functional_parquet.iceberg_v2_delete_positional-position-delete]
| HDFS partitions=1/1 files=1 size=1.54KB
@@ -99,14 +99,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
HDFS partitions=1/1 files=1 size=662B
- row-size=40B cardinality=4.73K
+ row-size=40B cardinality=3
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| row-size=40B cardinality=4.73K
+| row-size=40B cardinality=3
|
|--03:EXCHANGE [BROADCAST]
| |
@@ -116,14 +116,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional]
HDFS partitions=1/1 files=1 size=662B
- row-size=40B cardinality=4.73K
+ row-size=40B cardinality=3
====
SELECT * from iceberg_v2_positional_delete_all_rows;
---- PLAN
PLAN-ROOT SINK
|
02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| row-size=36B cardinality=8.93K
+| row-size=36B cardinality=3
|
|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_delete_all_rows-position-delete]
| HDFS partitions=1/1 files=1 size=2.60KB
@@ -131,14 +131,14 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
HDFS partitions=1/1 files=1 size=625B
- row-size=36B cardinality=8.93K
+ row-size=36B cardinality=3
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| row-size=36B cardinality=8.93K
+| row-size=36B cardinality=3
|
|--03:EXCHANGE [BROADCAST]
| |
@@ -148,7 +148,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
HDFS partitions=1/1 files=1 size=625B
- row-size=36B cardinality=8.93K
+ row-size=36B cardinality=3
====
SELECT * from iceberg_v2_no_deletes limit 1
---- PLAN
@@ -183,7 +183,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
HDFS partitions=1/1 files=1 size=625B
- row-size=36B cardinality=8.93K
+ row-size=36B cardinality=3
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -202,7 +202,7 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows]
HDFS partitions=1/1 files=1 size=625B
- row-size=36B cardinality=8.93K
+ row-size=36B cardinality=3
====
SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files limit 1
---- PLAN
@@ -214,7 +214,7 @@ PLAN-ROOT SINK
| row-size=36B cardinality=1
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=34.20K
+| | row-size=36B cardinality=10
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
@@ -222,15 +222,15 @@ PLAN-ROOT SINK
| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
| limit: 1
|
04:UNION
@@ -238,22 +238,24 @@ PLAN-ROOT SINK
| limit: 1
| row-size=36B cardinality=1
|
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| | row-size=36B cardinality=10
| |
-| |--05:EXCHANGE [BROADCAST]
+| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
| | |
| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
| | row-size=245B cardinality=4
| |
+| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
====
SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files
---- PLAN
@@ -261,10 +263,10 @@ PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=34.20K
+| | row-size=36B cardinality=10
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
@@ -272,36 +274,38 @@ PLAN-ROOT SINK
| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| | row-size=36B cardinality=10
| |
-| |--05:EXCHANGE [BROADCAST]
+| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
| | |
| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
| | row-size=245B cardinality=4
| |
+| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
====
SELECT * from iceberg_v2_positional_update_all_rows
---- PLAN
@@ -309,10 +313,10 @@ PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=27.47K
+| row-size=36B cardinality=12
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=13.74K
+| | row-size=36B cardinality=6
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
| | HDFS partitions=1/1 files=1 size=2.60KB
@@ -320,11 +324,11 @@ PLAN-ROOT SINK
| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
| HDFS partitions=1/1 files=1 size=625B
-| row-size=36B cardinality=13.74K
+| row-size=36B cardinality=6
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
HDFS partitions=1/1 files=1 size=625B
- row-size=36B cardinality=13.74K
+ row-size=36B cardinality=6
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
@@ -332,10 +336,10 @@ PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=27.47K
+| row-size=36B cardinality=12
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=13.74K
+| | row-size=36B cardinality=6
| |
| |--05:EXCHANGE [BROADCAST]
| | |
@@ -345,18 +349,18 @@ PLAN-ROOT SINK
| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
| HDFS partitions=1/1 files=1 size=625B
-| row-size=36B cardinality=13.74K
+| row-size=36B cardinality=6
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
HDFS partitions=1/1 files=1 size=625B
- row-size=36B cardinality=13.74K
+ row-size=36B cardinality=6
====
SELECT * from iceberg_v2_partitioned_position_deletes
---- PLAN
PLAN-ROOT SINK
|
02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| row-size=64B cardinality=4.96K
+| row-size=64B cardinality=20
|
|--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
| HDFS partitions=1/1 files=3 size=9.47KB
@@ -364,24 +368,26 @@ PLAN-ROOT SINK
|
00:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes]
HDFS partitions=1/1 files=3 size=3.48KB
- row-size=64B cardinality=4.96K
+ row-size=64B cardinality=20
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-04:EXCHANGE [UNPARTITIONED]
+05:EXCHANGE [UNPARTITIONED]
|
-02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| row-size=64B cardinality=4.96K
+02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| row-size=64B cardinality=20
|
-|--03:EXCHANGE [BROADCAST]
+|--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.pos,functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)]
| |
| 01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete]
| HDFS partitions=1/1 files=3 size=9.47KB
| row-size=182B cardinality=10
|
+03:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.file__position,functional_parquet.iceberg_v2_partitioned_position_deletes.input__file__name)]
+|
00:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes]
HDFS partitions=1/1 files=3 size=3.48KB
- row-size=64B cardinality=4.96K
+ row-size=64B cardinality=20
====
SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files
WHERE i > 2
@@ -390,10 +396,10 @@ PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=6.84K
+| row-size=36B cardinality=2
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=3.42K
+| | row-size=36B cardinality=1
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
@@ -402,39 +408,41 @@ PLAN-ROOT SINK
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
| predicates: i > 2
-| row-size=36B cardinality=3.42K
+| row-size=36B cardinality=1
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
predicates: i > 2
- row-size=36B cardinality=3.42K
+ row-size=36B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=6.84K
+| row-size=36B cardinality=2
|
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=3.42K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| | row-size=36B cardinality=1
| |
-| |--05:EXCHANGE [BROADCAST]
+| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
| | |
| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
| | row-size=245B cardinality=4
| |
+| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
| predicates: i > 2
-| row-size=36B cardinality=3.42K
+| row-size=36B cardinality=1
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
predicates: i > 2
- row-size=36B cardinality=3.42K
+ row-size=36B cardinality=1
====
select * from iceberg_v2_positional_not_all_data_files_have_delete_files for system_version as of 1497619269847778439
minus
@@ -444,22 +452,22 @@ PLAN-ROOT SINK
|
07:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-| row-size=16B cardinality=68.39K
+| row-size=16B cardinality=20
|
|--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=1 size=625B
-| row-size=16B cardinality=34.20K
+| row-size=16B cardinality=10
|
05:AGGREGATE [FINALIZE]
| group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-| row-size=16B cardinality=68.39K
+| row-size=16B cardinality=20
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=34.20K
+| | row-size=36B cardinality=10
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
@@ -467,56 +475,58 @@ PLAN-ROOT SINK
| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-12:EXCHANGE [UNPARTITIONED]
+13:EXCHANGE [UNPARTITIONED]
|
07:HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
| hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-| row-size=16B cardinality=68.39K
+| row-size=16B cardinality=20
|
-|--11:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
+|--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
| |
| 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=1 size=625B
-| row-size=16B cardinality=34.20K
+| row-size=16B cardinality=10
|
-10:AGGREGATE [FINALIZE]
+11:AGGREGATE [FINALIZE]
| group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-| row-size=16B cardinality=68.39K
+| row-size=16B cardinality=20
|
-09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
+10:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)]
|
05:AGGREGATE [STREAMING]
| group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s
-| row-size=16B cardinality=68.39K
+| row-size=16B cardinality=20
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| | row-size=36B cardinality=10
| |
-| |--08:EXCHANGE [BROADCAST]
+| |--09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
| | |
| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
| | row-size=245B cardinality=4
| |
+| 08:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
====
with v as (select i + 1000 as ii, upper(s) as ss from iceberg_v2_positional_not_all_data_files_have_delete_files)
select * from v where ii > 1003;
@@ -525,10 +535,10 @@ PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=6.84K
+| row-size=36B cardinality=2
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=3.42K
+| | row-size=36B cardinality=1
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
@@ -537,39 +547,41 @@ PLAN-ROOT SINK
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
| predicates: i + 1000 > 1003
-| row-size=36B cardinality=3.42K
+| row-size=36B cardinality=1
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
predicates: i + 1000 > 1003
- row-size=36B cardinality=3.42K
+ row-size=36B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-06:EXCHANGE [UNPARTITIONED]
+07:EXCHANGE [UNPARTITIONED]
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=6.84K
+| row-size=36B cardinality=2
|
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=3.42K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| | row-size=36B cardinality=1
| |
-| |--05:EXCHANGE [BROADCAST]
+| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
| | |
| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
| | row-size=245B cardinality=4
| |
+| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
| predicates: i + 1000 > 1003
-| row-size=36B cardinality=3.42K
+| row-size=36B cardinality=1
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
predicates: i + 1000 > 1003
- row-size=36B cardinality=3.42K
+ row-size=36B cardinality=1
====
select *
from iceberg_v2_positional_not_all_data_files_have_delete_files
@@ -580,7 +592,7 @@ PLAN-ROOT SINK
11:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: i = max(i)
| runtime filters: RF000 <- max(i)
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
|--10:AGGREGATE [FINALIZE]
| | output: max(i)
@@ -588,10 +600,10 @@ PLAN-ROOT SINK
| |
| 09:UNION
| | pass-through-operands: all
-| | row-size=24B cardinality=27.47K
+| | row-size=24B cardinality=12
| |
| |--07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | | row-size=24B cardinality=13.74K
+| | | row-size=24B cardinality=6
| | |
| | |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
| | | HDFS partitions=1/1 files=1 size=2.60KB
@@ -599,18 +611,18 @@ PLAN-ROOT SINK
| | |
| | 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
| | HDFS partitions=1/1 files=1 size=625B
-| | row-size=24B cardinality=13.74K
+| | row-size=24B cardinality=6
| |
| 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
| HDFS partitions=1/1 files=1 size=625B
-| row-size=24B cardinality=13.74K
+| row-size=24B cardinality=6
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
-| | row-size=36B cardinality=34.20K
+| | row-size=36B cardinality=10
| |
| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
@@ -619,29 +631,29 @@ PLAN-ROOT SINK
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
| runtime filters: RF000 -> i
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
runtime filters: RF000 -> i
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
-17:EXCHANGE [UNPARTITIONED]
+18:EXCHANGE [UNPARTITIONED]
|
11:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: i = max(i)
| runtime filters: RF000 <- max(i)
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
-|--16:EXCHANGE [BROADCAST]
+|--17:EXCHANGE [BROADCAST]
| |
-| 15:AGGREGATE [FINALIZE]
+| 16:AGGREGATE [FINALIZE]
| | output: max:merge(i)
| | row-size=4B cardinality=1
| |
-| 14:EXCHANGE [UNPARTITIONED]
+| 15:EXCHANGE [UNPARTITIONED]
| |
| 10:AGGREGATE
| | output: max(i)
@@ -649,12 +661,12 @@ PLAN-ROOT SINK
| |
| 09:UNION
| | pass-through-operands: all
-| | row-size=24B cardinality=27.47K
+| | row-size=24B cardinality=12
| |
| |--07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | | row-size=24B cardinality=13.74K
+| | | row-size=24B cardinality=6
| | |
-| | |--13:EXCHANGE [BROADCAST]
+| | |--14:EXCHANGE [BROADCAST]
| | | |
| | | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete]
| | | HDFS partitions=1/1 files=1 size=2.60KB
@@ -662,32 +674,34 @@ PLAN-ROOT SINK
| | |
| | 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
| | HDFS partitions=1/1 files=1 size=625B
-| | row-size=24B cardinality=13.74K
+| | row-size=24B cardinality=6
| |
| 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows]
| HDFS partitions=1/1 files=1 size=625B
-| row-size=24B cardinality=13.74K
+| row-size=24B cardinality=6
|
04:UNION
| pass-through-operands: all
-| row-size=36B cardinality=68.39K
+| row-size=36B cardinality=20
|
-|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST]
-| | row-size=36B cardinality=34.20K
+|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+| | row-size=36B cardinality=10
| |
-| |--12:EXCHANGE [BROADCAST]
+| |--13:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
| | |
| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
| | HDFS partitions=1/1 files=2 size=5.33KB
| | row-size=245B cardinality=4
| |
+| 12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+| |
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=2 size=1.22KB
| runtime filters: RF000 -> i
-| row-size=36B cardinality=34.20K
+| row-size=36B cardinality=10
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=2 size=1.22KB
runtime filters: RF000 -> i
- row-size=36B cardinality=34.20K
+ row-size=36B cardinality=10
====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
index ec8b7f7a1..56b9ae535 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
@@ -259,11 +259,11 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
HDFS partitions=1/1 files=3 size=3.41KB
stored statistics:
- table: rows=unavailable size=unavailable
+ table: rows=20 size=22.90KB
columns: unavailable
- extrapolated-rows=disabled max-scan-range-rows=unavailable
+ extrapolated-rows=disabled max-scan-range-rows=6
mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
- tuple-ids=0 row-size=44B cardinality=380
+ tuple-ids=0 row-size=44B cardinality=2
in pipelines: 00(GETNEXT)
====
# Sampling Iceberg tables. Count(*) is not optimized.
@@ -284,11 +284,11 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional_parquet.iceberg_non_partitioned]
HDFS partitions=1/1 files=3 size=3.41KB
stored statistics:
- table: rows=unavailable size=unavailable
+ table: rows=20 size=22.90KB
columns: all
- extrapolated-rows=disabled max-scan-range-rows=unavailable
+ extrapolated-rows=disabled max-scan-range-rows=6
mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
- tuple-ids=0 row-size=0B cardinality=380
+ tuple-ids=0 row-size=0B cardinality=2
in pipelines: 00(GETNEXT)
====
# Sampling partitioned Iceberg tables.
@@ -303,11 +303,11 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional_parquet.iceberg_partitioned]
HDFS partitions=1/1 files=10 size=11.46KB
stored statistics:
- table: rows=unavailable size=unavailable
+ table: rows=20 size=22.90KB
columns: unavailable
- extrapolated-rows=disabled max-scan-range-rows=unavailable
+ extrapolated-rows=disabled max-scan-range-rows=2
mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
- tuple-ids=0 row-size=44B cardinality=1.98K
+ tuple-ids=0 row-size=44B cardinality=10
in pipelines: 00(GETNEXT)
====
# Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg happens
@@ -325,13 +325,13 @@ PLAN-ROOT SINK
HDFS partitions=1/1 files=4 size=4.57KB
predicates: action = 'click'
stored statistics:
- table: rows=unavailable size=unavailable
+ table: rows=20 size=22.90KB
columns: unavailable
- extrapolated-rows=disabled max-scan-range-rows=unavailable
+ extrapolated-rows=disabled max-scan-range-rows=5
parquet statistics predicates: action = 'click'
parquet dictionary predicates: action = 'click'
mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
- tuple-ids=0 row-size=44B cardinality=198
+ tuple-ids=0 row-size=44B cardinality=1
in pipelines: 00(GETNEXT)
====
# Sampling Iceberg V2 tables. Delete files are not sampled, only the data files. So we
@@ -370,20 +370,20 @@ PLAN-ROOT SINK
| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
| HDFS partitions=1/1 files=1 size=625B
| stored statistics:
-| table: rows=unavailable size=unavailable
+| table: rows=10 size=7.77KB
| columns missing stats: i, s
-| extrapolated-rows=disabled max-scan-range-rows=unavailable
+| extrapolated-rows=disabled max-scan-range-rows=10
| mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
-| tuple-ids=0 row-size=36B cardinality=2.42K
+| tuple-ids=0 row-size=36B cardinality=1
| in pipelines: 00(GETNEXT)
|
03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
HDFS partitions=1/1 files=1 size=620B
stored statistics:
- table: rows=unavailable size=unavailable
+ table: rows=10 size=7.77KB
columns missing stats: i, s
- extrapolated-rows=disabled max-scan-range-rows=unavailable
+ extrapolated-rows=disabled max-scan-range-rows=10
mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
- tuple-ids=0 row-size=36B cardinality=2.42K
+ tuple-ids=0 row-size=36B cardinality=1
in pipelines: 03(GETNEXT)
====