You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dk...@apache.org on 2023/04/19 10:01:36 UTC

[hive] branch master updated: HIVE-27158: Store hive columns stats in puffin files for iceberg tables (Simhadri Govindappa, reviewed by Ayush Saxena, Denys Kuzmenko, Rajesh Balamohan, Zsolt Miskolczi)

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new a8a0ae782be HIVE-27158: Store hive columns stats in puffin files for iceberg tables (Simhadri Govindappa, reviewed by Ayush Saxena, Denys Kuzmenko, Rajesh Balamohan, Zsolt Miskolczi)
a8a0ae782be is described below

commit a8a0ae782be87d1198006ea3cb508f14070231b7
Author: SimhadriGovindappa <si...@gmail.com>
AuthorDate: Wed Apr 19 15:31:29 2023 +0530

    HIVE-27158: Store hive columns stats in puffin files for iceberg tables (Simhadri Govindappa, reviewed by Ayush Saxena, Denys Kuzmenko, Rajesh Balamohan, Zsolt Miskolczi)
    
    Closes #4131
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   5 +-
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 103 +++-
 .../src/test/queries/positive/col_stats.q          |  58 ++
 .../positive/use_basic_stats_from_iceberg.q        |   4 +-
 .../positive/vectorized_iceberg_read_mixed.q       |   8 +
 .../src/test/results/positive/col_stats.q.out      | 615 +++++++++++++++++++++
 .../positive/dynamic_partition_writes.q.out        |  22 +-
 .../llap/vectorized_iceberg_read_mixed.q.out       | 110 +++-
 .../positive/vectorized_iceberg_read_mixed.q.out   |  71 ++-
 .../hive/ql/metadata/HiveStorageHandler.java       |  41 ++
 .../hadoop/hive/ql/stats/ColStatsProcessor.java    |   3 +
 .../apache/hadoop/hive/ql/stats/StatsUtils.java    |   8 +-
 12 files changed, 1022 insertions(+), 26 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8b666164212..7e6903a39d6 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2205,9 +2205,8 @@ public class HiveConf extends Configuration {
         "padding tolerance config (hive.exec.orc.block.padding.tolerance)."),
     HIVE_ORC_CODEC_POOL("hive.use.orc.codec.pool", false,
         "Whether to use codec pool in ORC. Disable if there are bugs with codec reuse."),
-    HIVE_USE_STATS_FROM("hive.use.stats.from","iceberg","Use stats from iceberg table snapshot for query " +
-        "planning. This has three values metastore, puffin and iceberg"),
-
+    HIVE_ICEBERG_STATS_SOURCE("hive.iceberg.stats.source", "iceberg",
+        "Use stats from iceberg table snapshot for query planning. This has two values metastore and iceberg"),
     HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
         "If this is set the header for RCFiles will simply be RCF.  If this is not\n" +
         "set the header will be that borrowed from sequence files, e.g. SEQ- followed\n" +
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index bcadebbf4c0..db69d6c34c8 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -35,14 +36,20 @@ import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import org.apache.commons.collections4.ListUtils;
+import org.apache.commons.lang3.SerializationUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.LockType;
@@ -112,6 +119,12 @@ import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.hadoop.HadoopConfigurable;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.iceberg.mr.InputFormatConfig;
+import org.apache.iceberg.puffin.Blob;
+import org.apache.iceberg.puffin.BlobMetadata;
+import org.apache.iceberg.puffin.Puffin;
+import org.apache.iceberg.puffin.PuffinCompressionCodec;
+import org.apache.iceberg.puffin.PuffinReader;
+import org.apache.iceberg.puffin.PuffinWriter;
 import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.base.Splitter;
@@ -121,7 +134,10 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.relocated.com.google.common.collect.Streams;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+import org.apache.iceberg.util.Pair;
 import org.apache.iceberg.util.SerializationUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -136,6 +152,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
   private static final String PUFFIN = "puffin";
   public static final String COPY_ON_WRITE = "copy-on-write";
   public static final String MERGE_ON_READ = "merge-on-read";
+  public static final String STATS = "/stats/";
   /**
    * Function template for producing a custom sort expression function:
    * Takes the source column index and the bucket count to creat a function where Iceberg bucket UDF is used to build
@@ -318,7 +335,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
     // For write queries where rows got modified, don't fetch from cache as values could have changed.
     Table table = getTable(hmsTable);
-    String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
+    String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE).toLowerCase();
     Map<String, String> stats = Maps.newHashMap();
     switch (statsSource) {
       case ICEBERG:
@@ -361,6 +378,90 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
     return table;
   }
 
+  @Override
+  public boolean canSetColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    return table.currentSnapshot() != null ? getStatsSource().equals(ICEBERG) : false;
+  }
+
+  @Override
+  public boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable,
+      List<ColumnStatistics> colStats) {
+    Table tbl = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    String snapshotId = String.format("%s-STATS-%d", tbl.name(), tbl.currentSnapshot().snapshotId());
+    invalidateStats(getStatsPath(tbl));
+    byte[] serializeColStats = SerializationUtils.serialize((Serializable) colStats);
+    try (PuffinWriter writer = Puffin.write(tbl.io().newOutputFile(getStatsPath(tbl).toString()))
+        .createdBy(Constants.HIVE_ENGINE).build()) {
+      writer.add(
+          new Blob(
+              tbl.name() + "-" + snapshotId,
+              ImmutableList.of(1),
+              tbl.currentSnapshot().snapshotId(),
+              tbl.currentSnapshot().sequenceNumber(),
+              ByteBuffer.wrap(serializeColStats),
+              PuffinCompressionCodec.NONE,
+              ImmutableMap.of()));
+      writer.finish();
+    } catch (IOException e) {
+      LOG.error(String.valueOf(e));
+    }
+    return false;
+  }
+
+  @Override
+  public boolean canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    if (canSetColStatistics(hmsTable)) {
+      Path statsPath = getStatsPath(table);
+      try (FileSystem fs = statsPath.getFileSystem(conf)) {
+        if (fs.exists(statsPath)) {
+          return true;
+        }
+      } catch (IOException e) {
+        LOG.warn("Exception when trying to find Iceberg column stats for table:{} , snapshot:{} , " +
+            "statsPath: {} , stack trace: {}", table.name(), table.currentSnapshot(), statsPath, e);
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public List<ColumnStatisticsObj> getColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    String statsPath = getStatsPath(table).toString();
+    LOG.info("Using stats from puffin file at: {}", statsPath);
+    try (PuffinReader reader = Puffin.read(table.io().newInputFile(statsPath)).build()) {
+      List<BlobMetadata> blobMetadata = reader.fileMetadata().blobs();
+      Map<BlobMetadata, List<ColumnStatistics>> collect =
+          Streams.stream(reader.readAll(blobMetadata)).collect(Collectors.toMap(Pair::first,
+              blobMetadataByteBufferPair -> SerializationUtils.deserialize(
+                  ByteBuffers.toByteArray(blobMetadataByteBufferPair.second()))));
+      return collect.get(blobMetadata.get(0)).get(0).getStatsObj();
+    } catch (IOException e) {
+      LOG.error("Error when trying to read iceberg col stats from puffin files: {}", e);
+    }
+    return null;
+  }
+
+  private String getStatsSource() {
+    return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE, ICEBERG).toLowerCase();
+  }
+
+  private Path getStatsPath(Table table) {
+    return new Path(table.location() + STATS + table.name() + table.currentSnapshot().snapshotId());
+  }
+
+  private void invalidateStats(Path statsPath) {
+    try (FileSystem fs = statsPath.getFileSystem(conf)) {
+      if (fs.exists(statsPath)) {
+        fs.delete(statsPath, true);
+      }
+    } catch (IOException e) {
+      LOG.error("Failed to invalidate stale column stats: {}", e);
+    }
+  }
+
   /**
    * No need for exclusive locks when writing, since Iceberg tables use optimistic concurrency when writing
    * and only lock the table during the commit operation.
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q
new file mode 100644
index 00000000000..d7c4d811a8b
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q
@@ -0,0 +1,58 @@
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+set hive.stats.autogather=true;
+set hive.stats.column.autogather=true;
+
+set hive.iceberg.stats.source=iceberg;
+drop table if exists tbl_ice_puffin;
+create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice_puffin order by a, b, c;
+select * from tbl_ice_puffin order by a, b, c;
+desc formatted tbl_ice_puffin b;
+update tbl_ice_puffin set b='two' where b='one' or b='three';
+analyze table tbl_ice_puffin  compute statistics for columns;
+explain select * from tbl_ice_puffin order by a, b, c;
+select * from tbl_ice_puffin order by a, b, c;
+select count(*) from tbl_ice_puffin ;
+desc formatted tbl_ice_puffin b;
+
+
+-- Test if hive.iceberg.stats.source is empty
+set hive.iceberg.stats.source= ;
+drop table if exists tbl_ice_puffin;
+create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice_puffin order by a, b, c;
+
+
+set hive.iceberg.stats.source=iceberg;
+drop table if exists tbl_ice_puffin;
+create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice_puffin order by a, b, c;
+select * from tbl_ice_puffin order by a, b, c;
+select count(*) from tbl_ice_puffin ;
+desc formatted tbl_ice_puffin a;
+
+
+set hive.iceberg.stats.source=metastore;
+
+drop table if exists tbl_ice;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+set hive.iceberg.stats.source=iceberg;
+delete from tbl_ice_puffin  where  a = 2;
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
+create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
+describe formatted t1;
+describe formatted t2;
+explain select * from t1 join t2 on t1.a = t2.b;
\ No newline at end of file
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
index 90e2d95d1df..d80f420c42c 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
@@ -4,13 +4,13 @@ set hive.stats.autogather=true;
 set hive.stats.column.autogather=true;
 
 drop table if exists tbl_ice;
-set hive.use.stats.from = metastore;
+set hive.iceberg.stats.source=metastore;
 create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
 insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
 explain select * from tbl_ice order by a, b, c;
 
 drop table if exists tbl_ice;
-set hive.use.stats.from = iceberg;
+set hive.iceberg.stats.source = iceberg;
 create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
 insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
 explain select * from tbl_ice order by a, b, c;
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
index b630b1f802d..99069bc266a 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
@@ -46,6 +46,14 @@ explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_s
 select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
         group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal;
 
+create external table t1 stored as orc as select * from tbl_ice_mixed_all_types ;
+
+explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal;
+select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal;
+
+
 create external table tbl_ice_mixed_parted (
     a int,
     b string
diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
new file mode 100644
index 00000000000..b1f13fa76b5
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
@@ -0,0 +1,615 @@
+PREHOOK: query: drop table if exists tbl_ice_puffin
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice_puffin
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=18 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=18 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=18 width=95)
+                default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1	one	50
+1	one	50
+2	two	51
+2	two	51
+2	two	51
+2	two	51
+2	two	51
+2	two	51
+3	three	52
+3	three	52
+4	four	53
+4	four	53
+5	five	54
+5	five	54
+111	one	55
+111	one	55
+333	two	56
+333	two	56
+PREHOOK: query: desc formatted tbl_ice_puffin b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: query: desc formatted tbl_ice_puffin b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+col_name            	b                   
+data_type           	string              
+min                 	                    
+max                 	                    
+num_nulls           	0                   
+distinct_count      	5                   
+avg_col_len         	3.4444444444444446  
+max_col_len         	5                   
+num_trues           	                    
+num_falses          	                    
+bit_vector          	HL                  
+comment             	                    
+COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: update tbl_ice_puffin set b='two' where b='one' or b='three'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: update tbl_ice_puffin set b='two' where b='one' or b='three'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: analyze table tbl_ice_puffin  compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: analyze table tbl_ice_puffin  compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=24 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=24 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=24 width=95)
+                default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1	two	50
+1	two	50
+2	two	51
+2	two	51
+2	two	51
+2	two	51
+2	two	51
+2	two	51
+3	two	52
+3	two	52
+4	four	53
+4	four	53
+5	five	54
+5	five	54
+111	two	55
+111	two	55
+333	two	56
+333	two	56
+PREHOOK: query: select count(*) from tbl_ice_puffin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice_puffin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+18
+PREHOOK: query: desc formatted tbl_ice_puffin b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: query: desc formatted tbl_ice_puffin b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+col_name            	b                   
+data_type           	string              
+min                 	                    
+max                 	                    
+num_nulls           	0                   
+distinct_count      	3                   
+avg_col_len         	3.2222222222222223  
+max_col_len         	4                   
+num_trues           	                    
+num_falses          	                    
+bit_vector          	HL                  
+comment             	                    
+COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: drop table if exists tbl_ice_puffin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: drop table if exists tbl_ice_puffin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice_puffin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: drop table if exists tbl_ice_puffin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1	one	50
+2	two	51
+2	two	51
+2	two	51
+3	three	52
+4	four	53
+5	five	54
+111	one	55
+333	two	56
+PREHOOK: query: select count(*) from tbl_ice_puffin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice_puffin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: desc formatted tbl_ice_puffin a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: query: desc formatted tbl_ice_puffin a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+col_name            	a                   
+data_type           	int                 
+min                 	1                   
+max                 	333                 
+num_nulls           	0                   
+distinct_count      	7                   
+avg_col_len         	                    
+max_col_len         	                    
+num_trues           	                    
+num_falses          	                    
+bit_vector          	HL                  
+comment             	                    
+COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1	one	50
+2	two	51
+2	two	51
+2	two	51
+3	three	52
+4	four	53
+5	five	54
+111	one	55
+333	two	56
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: delete from tbl_ice_puffin  where  a = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: delete from tbl_ice_puffin  where  a = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: describe formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: describe formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name            	data_type           	comment             
+a                   	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
+	bucketing_version   	2                   
+	current-schema      	{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]}
+	engine.hive.enabled 	true                
+	format-version      	2                   
+	iceberg.orc.files.only	false               
+	metadata_location   	hdfs://### HDFS PATH ###
+	numFiles            	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	serialization.format	1                   
+	snapshot-count      	0                   
+	storage_handler     	org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+	table_type          	ICEBERG             
+	totalSize           	0                   
+#### A masked pattern was here ####
+	uuid                	#Masked#
+	write.delete.mode   	merge-on-read       
+	write.merge.mode    	merge-on-read       
+	write.update.mode   	merge-on-read       
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
+InputFormat:        	org.apache.iceberg.mr.hive.HiveIcebergInputFormat	 
+OutputFormat:       	org.apache.iceberg.mr.hive.HiveIcebergOutputFormat	 
+Compressed:         	No                  	 
+Sort Columns:       	[]                  	 
+PREHOOK: query: describe formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: describe formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name            	data_type           	comment             
+b                   	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
+	bucketing_version   	2                   
+	current-schema      	{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]}
+	engine.hive.enabled 	true                
+	format-version      	2                   
+	iceberg.orc.files.only	false               
+	metadata_location   	hdfs://### HDFS PATH ###
+	numFiles            	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	serialization.format	1                   
+	snapshot-count      	0                   
+	storage_handler     	org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+	table_type          	ICEBERG             
+	totalSize           	0                   
+#### A masked pattern was here ####
+	uuid                	#Masked#
+	write.delete.mode   	merge-on-read       
+	write.merge.mode    	merge-on-read       
+	write.update.mode   	merge-on-read       
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
+InputFormat:        	org.apache.iceberg.mr.hive.HiveIcebergInputFormat	 
+OutputFormat:       	org.apache.iceberg.mr.hive.HiveIcebergOutputFormat	 
+Compressed:         	No                  	 
+Sort Columns:       	[]                  	 
+PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2
+      File Output Operator [FS_10]
+        Merge Join Operator [MERGEJOIN_25] (rows=1 width=4)
+          Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_28]
+            PartitionCols:_col0
+            Select Operator [SEL_27] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_26] (rows=1 width=4)
+                predicate:a is not null
+                TableScan [TS_0] (rows=1 width=4)
+                  default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"]
+        <-Map 3 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_31]
+            PartitionCols:_col0
+            Select Operator [SEL_30] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_29] (rows=1 width=4)
+                predicate:b is not null
+                TableScan [TS_3] (rows=1 width=4)
+                  default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"]
+
diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
index 2cf955f898c..7e7a5eab1e3 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
@@ -76,9 +76,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:_col1
-                    Select Operator [SEL_12] (rows=22 width=87)
+                    Select Operator [SEL_12] (rows=22 width=91)
                       Output:["_col0","_col1"]
-                      TableScan [TS_0] (rows=22 width=87)
+                      TableScan [TS_0] (rows=22 width=91)
                         default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -90,7 +90,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=400)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"]
-                        Select Operator [SEL_14] (rows=22 width=87)
+                        Select Operator [SEL_14] (rows=22 width=91)
                           Output:["a","ccy"]
                            Please refer to the previous Select Operator [SEL_12]
 
@@ -170,9 +170,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:iceberg_bucket(_col1, 2)
-                    Select Operator [SEL_12] (rows=22 width=87)
+                    Select Operator [SEL_12] (rows=22 width=91)
                       Output:["_col0","_col1"]
-                      TableScan [TS_0] (rows=22 width=87)
+                      TableScan [TS_0] (rows=22 width=91)
                         default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -184,7 +184,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=400)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"]
-                        Select Operator [SEL_14] (rows=22 width=87)
+                        Select Operator [SEL_14] (rows=22 width=91)
                           Output:["a","ccy"]
                            Please refer to the previous Select Operator [SEL_12]
 
@@ -264,9 +264,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:_col1, iceberg_bucket(_col2, 3)
-                    Select Operator [SEL_12] (rows=22 width=94)
+                    Select Operator [SEL_12] (rows=22 width=99)
                       Output:["_col0","_col1","_col2"]
-                      TableScan [TS_0] (rows=22 width=94)
+                      TableScan [TS_0] (rows=22 width=99)
                         default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -278,7 +278,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=568)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)","min(c)","max(c)","count(c)","compute_bit_vector_hll(c)"]
-                        Select Operator [SEL_14] (rows=22 width=94)
+                        Select Operator [SEL_14] (rows=22 width=99)
                           Output:["a","ccy","c"]
                            Please refer to the previous Select Operator [SEL_12]
 
@@ -403,7 +403,7 @@ Stage-3
                       Output:["_col0","_col1","_col2"]
                       Filter Operator [FIL_14] (rows=4 width=99)
                         predicate:(b = 'EUR')
-                        TableScan [TS_0] (rows=22 width=94)
+                        TableScan [TS_0] (rows=22 width=99)
                           default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
               Reducer 3 vectorized
               File Output Operator [FS_24]
@@ -461,7 +461,7 @@ Stage-3
                         Output:["_col0","_col1","_col2"]
                         Filter Operator [FIL_12] (rows=1 width=99)
                           predicate:((c = 100L) and (b = 'USD'))
-                          TableScan [TS_0] (rows=22 width=94)
+                          TableScan [TS_0] (rows=22 width=99)
                             default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
                     PARTITION_ONLY_SHUFFLE [RS_17]
                       Group By Operator [GBY_16] (rows=1 width=568)
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
index bdef92e60f6..50ce82dc248 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
@@ -589,13 +589,13 @@ STAGE PLANS:
                     minReductionHashAggr: 0.99
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                    Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
                       null sort order: zzzzzzzzz
                       sort order: +++++++++
                       Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
-                      Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col9 (type: float)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
@@ -607,14 +607,116 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double), KEY._col1 (type: boolean), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: binary), KEY._col5 (type: string), KEY._col6 (type: timestamp), KEY._col7 (type: date), KEY._col8 (type: decimal(4,2))
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col9 (type: float), _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+                  Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+1.1	1.2	false	4	567890123456789	6	col7	2012-10-03 19:58:08	1234-09-02	10.01
+5.1	6.2	true	40	567890123456780	8	col07	2012-10-03 19:58:09	1234-09-03	10.02
+PREHOOK: query: create external table t1 stored as orc as select * from tbl_ice_mixed_all_types
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create external table t1 stored as orc as select * from tbl_ice_mixed_all_types
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.t_bigint SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_bigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: t1.t_binary SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_binary, type:binary, comment:null), ]
+POSTHOOK: Lineage: t1.t_boolean SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_boolean, type:boolean, comment:null), ]
+POSTHOOK: Lineage: t1.t_date SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_date, type:date, comment:null), ]
+POSTHOOK: Lineage: t1.t_decimal SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_decimal, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: t1.t_double SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_double, type:double, comment:null), ]
+POSTHOOK: Lineage: t1.t_float SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_float, type:float, comment:null), ]
+POSTHOOK: Lineage: t1.t_int SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_int, type:int, comment:null), ]
+POSTHOOK: Lineage: t1.t_string SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_string, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.t_timestamp SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_timestamp, type:timestamp, comment:null), ]
+PREHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_mixed_all_types
                   Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: max(t_float)
+                    keys: t_double (type: double), t_boolean (type: boolean), t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string (type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal (type: decimal(4,2))
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
+                      null sort order: zzzzzzzzz
+                      sort order: +++++++++
+                      Map-reduce partition columns: _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
+                      Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col9 (type: float)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: double), KEY._col1 (type: boolean), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: binary), KEY._col5 (type: string), KEY._col6 (type: timestamp), KEY._col7 (type: date), KEY._col8 (type: decimal(4,2))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col9 (type: float), _col0 (type: double), _col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: decimal(4,2))
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+                  Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
index a43950aa6ac..34696284306 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
@@ -538,14 +538,79 @@ Stage-0
     Stage-1
       Reducer 2 vectorized
       File Output Operator [FS_11]
-        Select Operator [SEL_10] (rows=2 width=373)
+        Select Operator [SEL_10] (rows=1 width=373)
           Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-          Group By Operator [GBY_9] (rows=2 width=373)
+          Group By Operator [GBY_9] (rows=1 width=373)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
           <-Map 1 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_8]
               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-              Group By Operator [GBY_7] (rows=2 width=373)
+              Group By Operator [GBY_7] (rows=1 width=373)
+                Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+                TableScan [TS_0] (rows=2 width=373)
+                  default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+
+PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1.1	1.2	false	4	567890123456789	6	col7	2012-10-03 19:58:08	1234-09-02	10.01
+5.1	6.2	true	40	567890123456780	8	col07	2012-10-03 19:58:09	1234-09-03	10.02
+PREHOOK: query: create external table t1 stored as orc as select * from tbl_ice_mixed_all_types
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create external table t1 stored as orc as select * from tbl_ice_mixed_all_types
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.t_bigint SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_bigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: t1.t_binary SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_binary, type:binary, comment:null), ]
+POSTHOOK: Lineage: t1.t_boolean SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_boolean, type:boolean, comment:null), ]
+POSTHOOK: Lineage: t1.t_date SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_date, type:date, comment:null), ]
+POSTHOOK: Lineage: t1.t_decimal SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_decimal, type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: t1.t_double SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_double, type:double, comment:null), ]
+POSTHOOK: Lineage: t1.t_float SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_float, type:float, comment:null), ]
+POSTHOOK: Lineage: t1.t_int SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_int, type:int, comment:null), ]
+POSTHOOK: Lineage: t1.t_string SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_string, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.t_timestamp SIMPLE [(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_timestamp, type:timestamp, comment:null), ]
+PREHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_11]
+        Select Operator [SEL_10] (rows=1 width=373)
+          Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+          Group By Operator [GBY_9] (rows=1 width=373)
+            Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8
+          <-Map 1 [SIMPLE_EDGE] vectorized
+            SHUFFLE [RS_8]
+              PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+              Group By Operator [GBY_7] (rows=1 width=373)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
                 TableScan [TS_0] (rows=2 width=373)
                   default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index aff2f51cbc1..65e14af478a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -29,6 +29,8 @@ import org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.LockType;
@@ -42,6 +44,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
 import org.apache.hadoop.hive.ql.parse.TransformSpec;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -245,6 +248,44 @@ public interface HiveStorageHandler extends Configurable {
     return false;
   }
 
+  /**
+   * Return some col statistics (Lower bounds, Upper bounds, Null value counts, NaN, total counts) calculated by
+   * the underlying storage handler implementation.
+   * @param table
+   * @return A List of Column Statistics Objects, can be null
+   */
+  default List<ColumnStatisticsObj>getColStatistics(org.apache.hadoop.hive.ql.metadata.Table table) {
+    return null;
+  }
+
+  /**
+   * Set column stats for non-native tables
+   * @param table
+   * @param colStats
+   * @return boolean
+   */
+  default boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table table,
+      List<ColumnStatistics> colStats) {
+    return false;
+  }
+
+  /**
+   * Check if the storage handler can provide col statistics.
+   * @param tbl
+   * @return true if the storage handler can supply the col statistics
+   */
+  default boolean canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table tbl) {
+    return false;
+  }
+
+  /**
+   * Check if the storage handler can set col statistics.
+   * @return true if the storage handler can set the col statistics
+   */
+  default boolean canSetColStatistics(org.apache.hadoop.hive.ql.metadata.Table tbl) {
+    return false;
+  }
+
   /**
    * Check if CTAS and CMV operations should behave in a direct-insert manner (i.e. no move task).
    * <p>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java
index 95d4b439d16..e2ee8ae07b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java
@@ -218,6 +218,9 @@ public class ColStatsProcessor implements IStatsProcessor {
       }
 
       start = System. currentTimeMillis();
+      if (tbl != null && tbl.isNonNative() && tbl.getStorageHandler().canSetColStatistics(tbl)) {
+        tbl.getStorageHandler().setColStatistics(tbl, colStats);
+      }
       db.setPartitionColumnStatistics(request);
       end = System.currentTimeMillis();
       LOG.info("Time taken to update " + colStats.size() + " stats : " + ((end - start)/1000F) + " seconds.");
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 9c1926a747e..a758bcdecd1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1069,8 +1069,12 @@ public class StatsUtils {
     }
     if (fetchColStats && !colStatsToRetrieve.isEmpty()) {
       try {
-        List<ColumnStatisticsObj> colStat = Hive.get().getTableColumnStatistics(
-            dbName, tabName, colStatsToRetrieve, false);
+        List<ColumnStatisticsObj> colStat;
+        if (table.isNonNative() && table.getStorageHandler().canProvideColStatistics(table)) {
+          colStat = table.getStorageHandler().getColStatistics(table);
+        } else {
+          colStat = Hive.get().getTableColumnStatistics(dbName, tabName, colStatsToRetrieve, false);
+        }
         stats = convertColStats(colStat, tabName);
       } catch (HiveException e) {
         LOG.error("Failed to retrieve table statistics: ", e);