You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/09/08 22:11:27 UTC

[impala] branch master updated (37f44a58f -> f598b2ad6)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 37f44a58f IMPALA-10918: Allow map type in SELECT list
     new bc92661bd IMPALA-11490: Add more metrics for event processor
     new f598b2ad6 IMPALA-10610: Support multiple file formats in a single Iceberg Table

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exec/hdfs-scan-node-base.cc                 |  93 +++++++----
 be/src/util/event-metrics.cc                       |  98 ++++++++++-
 be/src/util/event-metrics.h                        |  50 +++++-
 common/thrift/JniCatalog.thrift                    |  41 +++--
 common/thrift/metrics.json                         | 114 ++++++++++++-
 .../impala/catalog/events/MetastoreEvents.java     |   2 +
 .../catalog/events/MetastoreEventsProcessor.java   | 100 ++++++++++--
 .../org/apache/impala/planner/HdfsScanNode.java    |   4 +-
 .../org/apache/impala/planner/IcebergScanNode.java |  28 +++-
 testdata/data/README                               |  12 ++
 ...0b6136a-job_16619542960420_0002-1-00001.parquet | Bin 0 -> 872 bytes
 ...e500a19c1d1-job_16619542960420_0003-1-00001.orc | Bin 0 -> 437 bytes
 ...80faff0-job_16619542960420_0004-1-00001.parquet | Bin 0 -> 872 bytes
 ...19c5500ed04-job_16619542960420_0004-1-00001.orc | Bin 0 -> 435 bytes
 .../055baf62-de6d-4583-bf21-f187f9482343-m0.avro}  | Bin 3218 -> 3297 bytes
 .../871d1473-8566-46c0-a530-a2256b3f396f-m0.avro}  | Bin 3218 -> 3297 bytes
 .../a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro}  | Bin 3218 -> 3304 bytes
 .../d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro}  | Bin 3218 -> 3305 bytes
 ...35-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro} | Bin 1986 -> 1989 bytes
 ...59-1-055baf62-de6d-4583-bf21-f187f9482343.avro} | Bin 1992 -> 2379 bytes
 ...54-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro} | Bin 1992 -> 2249 bytes
 ...72-1-871d1473-8566-46c0-a530-a2256b3f396f.avro} | Bin 1992 -> 2119 bytes
 .../metadata/v1.metadata.json                      |  35 ++--
 .../metadata/v2.metadata.json                      |  94 +++++++++++
 .../metadata/v3.metadata.json}                     |  63 ++++---
 .../metadata/v4.metadata.json                      | 125 ++++++++++++++
 .../metadata/v5.metadata.json                      | 128 +++++++++++++++
 .../metadata/v6.metadata.json                      | 153 +++++++++++++++++
 .../metadata/v7.metadata.json                      | 156 ++++++++++++++++++
 .../metadata/v8.metadata.json                      | 181 +++++++++++++++++++++
 .../metadata/version-hint.txt                      |   1 +
 .../QueryTest/iceberg-mixed-file-format.test       |  11 ++
 tests/query_test/test_iceberg.py                   |   5 +
 33 files changed, 1369 insertions(+), 125 deletions(-)
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test/metadata/152e384f-2851-44b7-9ada-1bfbec74e9fc-m0.avro => iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro} (89%)
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test/metadata/152e384f-2851-44b7-9ada-1bfbec74e9fc-m0.avro => iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro} (89%)
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test/metadata/152e384f-2851-44b7-9ada-1bfbec74e9fc-m0.avro => iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro} (89%)
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test/metadata/152e384f-2851-44b7-9ada-1bfbec74e9fc-m0.avro => iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro} (89%)
 copy testdata/data/iceberg_test/{iceberg_migrated_alter_test/metadata/snap-2941076094076108396-1-c9f83a82-60f4-443b-9ca4-359cad16fe12.avro => iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro} (90%)
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test_orc/metadata/snap-3622599918649152504-1-8588fd4b-13c1-4451-80ad-5cf71a959b94.avro => iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro} (75%)
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test_orc/metadata/snap-3622599918649152504-1-8588fd4b-13c1-4451-80ad-5cf71a959b94.avro => iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro} (79%)
 copy testdata/data/iceberg_test/{iceberg_migrated_complex_test_orc/metadata/snap-3622599918649152504-1-8588fd4b-13c1-4451-80ad-5cf71a959b94.avro => iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro} (84%)
 copy testdata/data/iceberg_test/{iceberg_migrated_alter_test => iceberg_mixed_file_format_test}/metadata/v1.metadata.json (57%)
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json
 copy testdata/data/iceberg_test/{iceberg_migrated_alter_test_orc/metadata/v2.metadata.json => iceberg_mixed_file_format_test/metadata/v3.metadata.json} (51%)
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json
 create mode 100644 testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt
 create mode 100644 testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test


[impala] 01/02: IMPALA-11490: Add more metrics for event processor

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit bc92661bd3105cb378a3d140e247207959916d16
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Sun Sep 4 09:25:17 2022 +0800

    IMPALA-11490: Add more metrics for event processor
    
    This patch adds more metrics to debug event processing lagging behind.
    The latest event id in HMS is added so users can compare it with the
    last synced event id to know how many events are waiting to be synced.
    The event time of the last synced event and latest event in HMS are also
    added. Users can compare them to know how long catalogd is lagging
    behind. The update of the latest event id and event time are done in a
    dedicated thread in case the event-processor thread is blocked by slow
    metadata reloading or waiting for table locks.
    
    This patch also fixes the wrong metrics on events fetching and
    processing duration. Previously the method we used is
    Timer.getMeanRate() which returns the mean rate at which the duration is
    recorded. The correct method should be Timer.getSnapshot().getMean(). By
    getting the snapshot, we can also expose metrics of the 75th/95th/99th
    percentiles.
    
    To facilitate metrics collection, the last durations of events fetching
    and processing are also exposed.
    
    Tests:
     - Manually verified the metrics when running some Hive workloads
    
    Change-Id: I0e7d40a0d8e140e6b0698936e97b454cb9abdc1b
    Reviewed-on: http://gerrit.cloudera.org:8080/18937
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/event-metrics.cc                       |  98 +++++++++++++++++-
 be/src/util/event-metrics.h                        |  50 ++++++++-
 common/thrift/JniCatalog.thrift                    |  41 ++++++--
 common/thrift/metrics.json                         | 114 ++++++++++++++++++++-
 .../impala/catalog/events/MetastoreEvents.java     |   2 +
 .../catalog/events/MetastoreEventsProcessor.java   | 100 ++++++++++++++++--
 6 files changed, 375 insertions(+), 30 deletions(-)

diff --git a/be/src/util/event-metrics.cc b/be/src/util/event-metrics.cc
index c878f0ca0..061025c6d 100644
--- a/be/src/util/event-metrics.cc
+++ b/be/src/util/event-metrics.cc
@@ -36,9 +36,25 @@ string MetastoreEventMetrics::NUMBER_EVENTS_SKIPPED_METRIC_NAME =
 string MetastoreEventMetrics::EVENT_PROCESSOR_STATUS_METRIC_NAME =
     "events-processor.status";
 string MetastoreEventMetrics::EVENTS_FETCH_DURATION_MEAN_METRIC_NAME =
-    "events-processor.avg-events-fetch-duration";
+    "events-processor.events-fetch-duration-avg";
+string MetastoreEventMetrics::EVENTS_FETCH_DURATION_P75_METRIC_NAME =
+    "events-processor.events-fetch-duration-p75";
+string MetastoreEventMetrics::EVENTS_FETCH_DURATION_P95_METRIC_NAME =
+    "events-processor.events-fetch-duration-p95";
+string MetastoreEventMetrics::EVENTS_FETCH_DURATION_P99_METRIC_NAME =
+    "events-processor.events-fetch-duration-p99";
+string MetastoreEventMetrics::EVENTS_FETCH_LAST_DURATION_METRIC_NAME =
+    "events-processor.events-fetch-duration-latest";
 string MetastoreEventMetrics::EVENTS_PROCESS_DURATION_MEAN_METRIC_NAME =
-    "events-processor.avg-events-process-duration";
+    "events-processor.events-process-duration-avg";
+string MetastoreEventMetrics::EVENTS_PROCESS_DURATION_P75_METRIC_NAME =
+    "events-processor.events-process-duration-p75";
+string MetastoreEventMetrics::EVENTS_PROCESS_DURATION_P95_METRIC_NAME =
+    "events-processor.events-process-duration-p95";
+string MetastoreEventMetrics::EVENTS_PROCESS_DURATION_P99_METRIC_NAME =
+    "events-processor.events-process-duration-p99";
+string MetastoreEventMetrics::EVENTS_PROCESS_LAST_DURATION_METRIC_NAME =
+    "events-processor.events-process-duration-latest";
 
 string MetastoreEventMetrics::EVENTS_RECEIVED_1MIN_METRIC_NAME =
     "events-processor.events-received-1min-rate";
@@ -48,12 +64,27 @@ string MetastoreEventMetrics::EVENTS_RECEIVED_15MIN_METRIC_NAME =
     "events-processor.events-received-15min-rate";
 string MetastoreEventMetrics::LAST_SYNCED_EVENT_ID_METRIC_NAME =
     "events-processor.last-synced-event-id";
+string MetastoreEventMetrics::LAST_SYNCED_EVENT_TIME_METRIC_NAME =
+    "events-processor.last-synced-event-time";
+string MetastoreEventMetrics::LATEST_EVENT_ID_METRIC_NAME =
+    "events-processor.latest-event-id";
+string MetastoreEventMetrics::LATEST_EVENT_TIME_METRIC_NAME =
+    "events-processor.latest-event-time";
 
 IntCounter* MetastoreEventMetrics::NUM_EVENTS_RECEIVED_COUNTER = nullptr;
 IntCounter* MetastoreEventMetrics::NUM_EVENTS_SKIPPED_COUNTER = nullptr;
 
 DoubleGauge* MetastoreEventMetrics::EVENTS_FETCH_DURATION_MEAN = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_FETCH_DURATION_P75 = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_FETCH_DURATION_P95 = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_FETCH_DURATION_P99 = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_FETCH_LAST_DURATION = nullptr;
+
 DoubleGauge* MetastoreEventMetrics::EVENTS_PROCESS_DURATION_MEAN = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_PROCESS_DURATION_P75 = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_PROCESS_DURATION_P95 = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_PROCESS_DURATION_P99 = nullptr;
+DoubleGauge* MetastoreEventMetrics::EVENTS_PROCESS_LAST_DURATION = nullptr;
 
 StringProperty* MetastoreEventMetrics::EVENT_PROCESSOR_STATUS = nullptr;
 
@@ -61,6 +92,9 @@ DoubleGauge* MetastoreEventMetrics::EVENTS_RECEIVED_1MIN_RATE = nullptr;
 DoubleGauge* MetastoreEventMetrics::EVENTS_RECEIVED_5MIN_RATE = nullptr;
 DoubleGauge* MetastoreEventMetrics::EVENTS_RECEIVED_15MIN_RATE = nullptr;
 IntCounter* MetastoreEventMetrics::LAST_SYNCED_EVENT_ID = nullptr;
+IntCounter* MetastoreEventMetrics::LAST_SYNCED_EVENT_TIME = nullptr;
+IntCounter* MetastoreEventMetrics::LATEST_EVENT_ID = nullptr;
+IntCounter* MetastoreEventMetrics::LATEST_EVENT_TIME = nullptr;
 
 // Initialize all the metrics for the events metric group
 void MetastoreEventMetrics::InitMetastoreEventMetrics(MetricGroup* metric_group) {
@@ -75,10 +109,29 @@ void MetastoreEventMetrics::InitMetastoreEventMetrics(MetricGroup* metric_group)
       event_metrics->AddCounter(NUMBER_EVENTS_RECEIVED_METRIC_NAME, 0);
   NUM_EVENTS_SKIPPED_COUNTER =
       event_metrics->AddCounter(NUMBER_EVENTS_SKIPPED_METRIC_NAME, 0);
+
   EVENTS_FETCH_DURATION_MEAN =
       event_metrics->AddDoubleGauge(EVENTS_FETCH_DURATION_MEAN_METRIC_NAME, 0.0);
+  EVENTS_FETCH_DURATION_P75 =
+      event_metrics->AddDoubleGauge(EVENTS_FETCH_DURATION_P75_METRIC_NAME, 0.0);
+  EVENTS_FETCH_DURATION_P95 =
+      event_metrics->AddDoubleGauge(EVENTS_FETCH_DURATION_P95_METRIC_NAME, 0.0);
+  EVENTS_FETCH_DURATION_P99 =
+      event_metrics->AddDoubleGauge(EVENTS_FETCH_DURATION_P99_METRIC_NAME, 0.0);
+  EVENTS_FETCH_LAST_DURATION =
+      event_metrics->AddDoubleGauge(EVENTS_FETCH_LAST_DURATION_METRIC_NAME, 0.0);
+
   EVENTS_PROCESS_DURATION_MEAN =
       event_metrics->AddDoubleGauge(EVENTS_PROCESS_DURATION_MEAN_METRIC_NAME, 0.0);
+  EVENTS_PROCESS_DURATION_P75 =
+      event_metrics->AddDoubleGauge(EVENTS_PROCESS_DURATION_P75_METRIC_NAME, 0.0);
+  EVENTS_PROCESS_DURATION_P95 =
+      event_metrics->AddDoubleGauge(EVENTS_PROCESS_DURATION_P95_METRIC_NAME, 0.0);
+  EVENTS_PROCESS_DURATION_P99 =
+      event_metrics->AddDoubleGauge(EVENTS_PROCESS_DURATION_P99_METRIC_NAME, 0.0);
+  EVENTS_PROCESS_LAST_DURATION =
+      event_metrics->AddDoubleGauge(EVENTS_PROCESS_LAST_DURATION_METRIC_NAME, 0.0);
+
   EVENTS_RECEIVED_1MIN_RATE =
       event_metrics->AddDoubleGauge(EVENTS_RECEIVED_1MIN_METRIC_NAME, 0.0);
   EVENTS_RECEIVED_5MIN_RATE =
@@ -87,6 +140,12 @@ void MetastoreEventMetrics::InitMetastoreEventMetrics(MetricGroup* metric_group)
       event_metrics->AddDoubleGauge(EVENTS_RECEIVED_15MIN_METRIC_NAME, 0.0);
   LAST_SYNCED_EVENT_ID =
       event_metrics->AddCounter(LAST_SYNCED_EVENT_ID_METRIC_NAME, 0);
+  LAST_SYNCED_EVENT_TIME =
+      event_metrics->AddCounter(LAST_SYNCED_EVENT_TIME_METRIC_NAME, 0);
+  LATEST_EVENT_ID =
+      event_metrics->AddCounter(LATEST_EVENT_ID_METRIC_NAME, 0);
+  LATEST_EVENT_TIME =
+      event_metrics->AddCounter(LATEST_EVENT_TIME_METRIC_NAME, 0);
 }
 
 void MetastoreEventMetrics::refresh(TEventProcessorMetrics* response) {
@@ -106,9 +165,33 @@ void MetastoreEventMetrics::refresh(TEventProcessorMetrics* response) {
   if (response->__isset.events_fetch_duration_mean) {
     EVENTS_FETCH_DURATION_MEAN->SetValue(response->events_fetch_duration_mean);
   }
+  if (response->__isset.events_fetch_duration_p75) {
+    EVENTS_FETCH_DURATION_P75->SetValue(response->events_fetch_duration_p75);
+  }
+  if (response->__isset.events_fetch_duration_p95) {
+    EVENTS_FETCH_DURATION_P95->SetValue(response->events_fetch_duration_p95);
+  }
+  if (response->__isset.events_fetch_duration_p99) {
+    EVENTS_FETCH_DURATION_P99->SetValue(response->events_fetch_duration_p99);
+  }
+  if (response->__isset.last_events_fetch_duration) {
+    EVENTS_FETCH_LAST_DURATION->SetValue(response->last_events_fetch_duration);
+  }
   if (response->__isset.events_process_duration_mean) {
     EVENTS_PROCESS_DURATION_MEAN->SetValue(response->events_process_duration_mean);
   }
+  if (response->__isset.events_process_duration_p75) {
+    EVENTS_PROCESS_DURATION_P75->SetValue(response->events_process_duration_p75);
+  }
+  if (response->__isset.events_process_duration_p95) {
+    EVENTS_PROCESS_DURATION_P95->SetValue(response->events_process_duration_p95);
+  }
+  if (response->__isset.events_process_duration_p99) {
+    EVENTS_PROCESS_DURATION_P99->SetValue(response->events_process_duration_p99);
+  }
+  if (response->__isset.last_events_process_duration) {
+    EVENTS_PROCESS_LAST_DURATION->SetValue(response->last_events_process_duration);
+  }
   if (response->__isset.events_received_1min_rate) {
     EVENTS_RECEIVED_1MIN_RATE->SetValue(response->events_received_1min_rate);
   }
@@ -118,8 +201,17 @@ void MetastoreEventMetrics::refresh(TEventProcessorMetrics* response) {
   if (response->__isset.events_received_15min_rate) {
     EVENTS_RECEIVED_15MIN_RATE->SetValue(response->events_received_15min_rate);
   }
-  if(response->__isset.last_synced_event_id){
+  if (response->__isset.last_synced_event_id) {
     LAST_SYNCED_EVENT_ID->SetValue(response->last_synced_event_id);
   }
+  if (response->__isset.last_synced_event_time) {
+    LAST_SYNCED_EVENT_TIME->SetValue(response->last_synced_event_time);
+  }
+  if (response->__isset.latest_event_id) {
+    LATEST_EVENT_ID->SetValue(response->latest_event_id);
+  }
+  if (response->__isset.latest_event_time) {
+    LATEST_EVENT_TIME->SetValue(response->latest_event_time);
+  }
 }
 } // namespace impala
diff --git a/be/src/util/event-metrics.h b/be/src/util/event-metrics.h
index e6fa1e1df..e314c9bcf 100644
--- a/be/src/util/event-metrics.h
+++ b/be/src/util/event-metrics.h
@@ -42,11 +42,23 @@ class MetastoreEventMetrics {
   /// Total number of events skipped so far
   static IntCounter* NUM_EVENTS_SKIPPED_COUNTER;
 
-  /// Mean duration required to fetch a batch of events
+  /// Mean/p75/p95/p99 duration required to fetch a batch of events
   static DoubleGauge* EVENTS_FETCH_DURATION_MEAN;
+  static DoubleGauge* EVENTS_FETCH_DURATION_P75;
+  static DoubleGauge* EVENTS_FETCH_DURATION_P95;
+  static DoubleGauge* EVENTS_FETCH_DURATION_P99;
 
-  /// Mean duration required to process the fetched batch of events
+  /// Duration of fetching the last event batch
+  static DoubleGauge* EVENTS_FETCH_LAST_DURATION;
+
+  /// Mean/p75/p95/p99 duration required to process the fetched batch of events
   static DoubleGauge* EVENTS_PROCESS_DURATION_MEAN;
+  static DoubleGauge* EVENTS_PROCESS_DURATION_P75;
+  static DoubleGauge* EVENTS_PROCESS_DURATION_P95;
+  static DoubleGauge* EVENTS_PROCESS_DURATION_P99;
+
+  /// Duration of processing the last event batch
+  static DoubleGauge* EVENTS_PROCESS_LAST_DURATION;
 
   /// The current status of Metastore events processor.
   /// See MetastoreEventProcessor.EventProcessorStatus for possible state values
@@ -64,6 +76,15 @@ class MetastoreEventMetrics {
   /// Last metastore event id that the catalog server synced to.
   static IntCounter* LAST_SYNCED_EVENT_ID;
 
+  /// Last metastore event time that the catalog server synced to.
+  static IntCounter* LAST_SYNCED_EVENT_TIME;
+
+  /// Latest metastore event id
+  static IntCounter* LATEST_EVENT_ID;
+
+  /// Latest metastore event time
+  static IntCounter* LATEST_EVENT_TIME;
+
  private:
   /// Following metric names must match with the key in metrics.json
 
@@ -76,11 +97,23 @@ class MetastoreEventMetrics {
   /// metric name for event processor status
   static std::string EVENT_PROCESSOR_STATUS_METRIC_NAME;
 
-  /// metric name for the mean time taken for events fetch metric
+  /// metric name for the mean/p75/p95/p99 time taken for events fetch metric
   static std::string EVENTS_FETCH_DURATION_MEAN_METRIC_NAME;
+  static std::string EVENTS_FETCH_DURATION_P75_METRIC_NAME;
+  static std::string EVENTS_FETCH_DURATION_P95_METRIC_NAME;
+  static std::string EVENTS_FETCH_DURATION_P99_METRIC_NAME;
 
-  /// metric name for the mean time taken for events processing metric
+  /// metric name for the duration of fetching the last event batch
+  static std::string EVENTS_FETCH_LAST_DURATION_METRIC_NAME;
+
+  /// metric name for the mean/p75/p95/p99 time taken for events processing metric
   static std::string EVENTS_PROCESS_DURATION_MEAN_METRIC_NAME;
+  static std::string EVENTS_PROCESS_DURATION_P75_METRIC_NAME;
+  static std::string EVENTS_PROCESS_DURATION_P95_METRIC_NAME;
+  static std::string EVENTS_PROCESS_DURATION_P99_METRIC_NAME;
+
+  /// metric name for the duration of processing the last event batch
+  static std::string EVENTS_PROCESS_LAST_DURATION_METRIC_NAME;
 
   /// metric name for EWMA of number of events in last 1 min
   static std::string EVENTS_RECEIVED_1MIN_METRIC_NAME;
@@ -93,6 +126,15 @@ class MetastoreEventMetrics {
 
   /// Metric name for last metastore event id that the catalog server synced to.
   static std::string LAST_SYNCED_EVENT_ID_METRIC_NAME;
+
+  /// Metric name for the event time of the last synced metastore event
+  static std::string LAST_SYNCED_EVENT_TIME_METRIC_NAME;
+
+  /// Metric name for the latest metastore event id
+  static std::string LATEST_EVENT_ID_METRIC_NAME;
+
+  /// Metric name for the event time of the latest metastore event
+  static std::string LATEST_EVENT_TIME_METRIC_NAME;
 };
 
 } // namespace impala
diff --git a/common/thrift/JniCatalog.thrift b/common/thrift/JniCatalog.thrift
index 24cd7856d..7f68dfaf3 100644
--- a/common/thrift/JniCatalog.thrift
+++ b/common/thrift/JniCatalog.thrift
@@ -868,26 +868,47 @@ struct TEventProcessorMetrics {
   // Total number of events skipped so far
   3: optional i64 events_skipped
 
-  // Mean time in sec for the fetching metastore events
+  // Time in sec for the fetching metastore events
   4: optional double events_fetch_duration_mean
+  5: optional double events_fetch_duration_p75
+  6: optional double events_fetch_duration_p95
+  7: optional double events_fetch_duration_p99
 
-  // Mean time in sec for processing a given batch of events
-  5: optional double events_process_duration_mean
+  // Duration in sec for fetching the last event batch
+  8: optional double last_events_fetch_duration
 
-  // Average number of events received in 1 min
-  6: optional double events_received_1min_rate
+  // Time in sec for processing a given batch of events
+  9: optional double events_process_duration_mean
+  10: optional double events_process_duration_p75
+  11: optional double events_process_duration_p95
+  12: optional double events_process_duration_p99
 
-  // Average number of events received in 1 min
-  7: optional double events_received_5min_rate
+  // Duration in sec for processing the last event batch
+  13: optional double last_events_process_duration
 
   // Average number of events received in 1 min
-  8: optional double events_received_15min_rate
+  14: optional double events_received_1min_rate
+
+  // Average number of events received in 5 min
+  15: optional double events_received_5min_rate
+
+  // Average number of events received in 15 min
+  16: optional double events_received_15min_rate
 
   // Average number events skipped in a polling interval
-  9: optional double events_skipped_per_poll_mean
+  17: optional double events_skipped_per_poll_mean
 
   // Last metastore event id that the catalog server synced to
-  10: optional i64 last_synced_event_id
+  18: optional i64 last_synced_event_id
+
+  // Event time of the last synced event
+  19: optional i64 last_synced_event_time
+
+  // Latest metastore event id
+  20: optional i64 latest_event_id
+
+  // Event time of the latest metastore event
+  21: optional i64 latest_event_time
 }
 
 struct TCatalogHmsCacheApiMetrics {
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index 5780c838d..d84453ca8 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -2690,7 +2690,47 @@
     "label": "Average duration to fetch metastore events",
     "units": "TIME_S",
     "kind": "GAUGE",
-    "key": "events-processor.avg-events-fetch-duration"
+    "key": "events-processor.events-fetch-duration-avg"
+  },
+  {
+    "description": "75th percentile of the time taken to fetch a batch of metastore events",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "P75 duration to fetch metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-fetch-duration-p75"
+  },
+  {
+    "description": "95th percentile of the time taken to fetch a batch of metastore events",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "P95 duration to fetch metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-fetch-duration-p95"
+  },
+  {
+    "description": "99th percentile of the time taken to fetch a batch of metastore events",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "P99 duration to fetch metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-fetch-duration-p99"
+  },
+  {
+    "description": "Last time taken to fetch a batch of metastore events",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "Last duration to fetch a batch of metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-fetch-duration-latest"
   },
   {
     "description": "Average time taken to process a batch of events received from metastore",
@@ -2700,7 +2740,47 @@
     "label": "Average duration to process a batch of metastore events",
     "units": "TIME_S",
     "kind": "GAUGE",
-    "key": "events-processor.avg-events-process-duration"
+    "key": "events-processor.events-process-duration-avg"
+  },
+  {
+    "description": "75th percentile of the time taken to process a batch of events received from metastore",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "P75 duration to process a batch of metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-process-duration-p75"
+  },
+  {
+    "description": "95th percentile of the time taken to process a batch of events received from metastore",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "P95 duration to process a batch of metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-process-duration-p95"
+  },
+  {
+    "description": "99th percentile of the time taken to process a batch of events received from metastore",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "P99 duration to process a batch of metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-process-duration-p99"
+  },
+  {
+    "description": "Last time taken to process a batch of events received from metastore",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "Last duration to process a batch of metastore events",
+    "units": "TIME_S",
+    "kind": "GAUGE",
+    "key": "events-processor.events-process-duration-latest"
   },
   {
     "description": "Exponentially weighted moving average (EWMA) of number of events received in last 1 min",
@@ -2742,6 +2822,36 @@
     "kind" : "COUNTER",
     "key" : "events-processor.last-synced-event-id"
   },
+  {
+    "description": "Last metastore event time that the catalog server processed and synced to",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "Last Synced Event Time",
+    "units": "NONE",
+    "kind" : "COUNTER",
+    "key" : "events-processor.last-synced-event-time"
+  },
+  {
+    "description": "Latest event id in Hive metastore",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "Latest Event Id",
+    "units": "NONE",
+    "kind" : "COUNTER",
+    "key" : "events-processor.latest-event-id"
+  },
+  {
+    "description": "Event time of the latest event in Hive metastore",
+    "contexts": [
+      "CATALOGSERVER"
+    ],
+    "label": "Latest Event Time",
+    "units": "NONE",
+    "kind" : "COUNTER",
+    "key" : "events-processor.latest-event-time"
+  },
   {
     "description": "Total number of executor groups that have at least one executor",
     "contexts": [
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index ec788b009..2c8e6c98d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -470,6 +470,8 @@ public class MetastoreEvents {
 
     public long getEventId() { return eventId_; }
 
+    public long getEventTime() { return event_.getEventTime(); }
+
     public MetastoreEventType getEventType() { return eventType_; }
 
     /**
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEventsProcessor.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEventsProcessor.java
index 5cfc2df13..264caab4e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEventsProcessor.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEventsProcessor.java
@@ -18,6 +18,7 @@
 package org.apache.impala.catalog.events;
 
 import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Snapshot;
 import com.codahale.metrics.Timer;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -49,6 +50,7 @@ import org.apache.impala.catalog.events.MetastoreEvents.DropDatabaseEvent;
 import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEvent;
 import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventFactory;
 import org.apache.impala.common.Metrics;
+import org.apache.impala.common.PrintUtils;
 import org.apache.impala.compat.MetastoreShim;
 import org.apache.impala.service.CatalogOpExecutor;
 import org.apache.impala.thrift.TEventProcessorMetrics;
@@ -250,6 +252,8 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
   // number of batch events generated
   public static final String NUMBER_OF_BATCH_EVENTS = "batch-events-created";
 
+  private static final long SECOND_IN_NANOS = 1000 * 1000 * 1000L;
+
   /**
    * Wrapper around {@link
    * MetastoreEventsProcessor#getNextMetastoreEventsInBatches(CatalogServiceCatalog,
@@ -489,6 +493,15 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
 
   // keeps track of the last event id which we have synced to
   private final AtomicLong lastSyncedEventId_ = new AtomicLong(-1);
+  private final AtomicLong lastSyncedEventTimeMs_ = new AtomicLong(0);
+
+  // The event id and eventTime of the latest event in HMS. Only used in metrics to show
+  // how far we are lagging behind.
+  private final AtomicLong latestEventId_ = new AtomicLong(-1);
+  private final AtomicLong latestEventTimeMs_ = new AtomicLong(0);
+
+  // The duration in nanoseconds of the processing of the last event batch.
+  private final AtomicLong lastEventProcessDurationNs_ = new AtomicLong(0);
 
   // polling interval in seconds. Note this is a time we wait AFTER each fetch call
   private final long pollingFrequencyInSec_;
@@ -645,6 +658,10 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
         pollingFrequencyInSec_));
     scheduler_.scheduleWithFixedDelay(this::processEvents, pollingFrequencyInSec_,
         pollingFrequencyInSec_, TimeUnit.SECONDS);
+    // Update latestEventId in another thread in case that the processEvents() thread is
+    // blocked by slow metadata reloading or waiting for table locks.
+    scheduler_.scheduleWithFixedDelay(this::updateLatestEventId, pollingFrequencyInSec_,
+        pollingFrequencyInSec_, TimeUnit.SECONDS);
   }
 
   /**
@@ -849,6 +866,40 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
     }
   }
 
+  /**
+   * Update the latest event id regularly so we know how far we are lagging behind.
+   */
+  private void updateLatestEventId() {
+    EventProcessorStatus currentStatus = eventProcessorStatus_;
+    if (currentStatus != EventProcessorStatus.ACTIVE) {
+      return;
+    }
+    try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+      CurrentNotificationEventId currentNotificationEventId =
+          msClient.getHiveClient().getCurrentNotificationEventId();
+      long currentEventId = currentNotificationEventId.getEventId();
+      // no new events since we last polled
+      if (currentEventId <= latestEventId_.get()) {
+        return;
+      }
+      // Fetch the last event to get its eventTime.
+      NotificationEventRequest eventRequest = new NotificationEventRequest();
+      eventRequest.setLastEvent(currentEventId - 1);
+      eventRequest.setMaxEvents(1);
+      NotificationEventResponse response = MetastoreShim
+          .getNextNotification(msClient.getHiveClient(), eventRequest);
+      NotificationEvent event = response.getEventsIterator().next();
+      Preconditions.checkState(event.getEventId() == currentEventId);
+      LOG.info("Latest event in HMS: id={}, time={}", currentEventId,
+          event.getEventTime());
+      latestEventId_.set(currentEventId);
+      latestEventTimeMs_.set(event.getEventTime());
+    } catch (Exception e) {
+      LOG.error("Unable to update current notification event id. Last value: {}",
+          latestEventId_, e);
+    }
+  }
+
   /**
    * Gets the current event processor metrics along with its status. If the status is
    * not active the metrics are skipped. Only the status is sent
@@ -860,25 +911,48 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
     eventProcessorMetrics.setStatus(currentStatus.toString());
     eventProcessorMetrics.setLast_synced_event_id(getLastSyncedEventId());
     if (currentStatus != EventProcessorStatus.ACTIVE) return eventProcessorMetrics;
+    // The following counters are only updated when event-processor is active.
+    eventProcessorMetrics.setLast_synced_event_time(lastSyncedEventTimeMs_.get());
+    eventProcessorMetrics.setLatest_event_id(latestEventId_.get());
+    eventProcessorMetrics.setLatest_event_time(latestEventTimeMs_.get());
 
     long eventsReceived = metrics_.getMeter(EVENTS_RECEIVED_METRIC).getCount();
     long eventsSkipped = metrics_.getCounter(EVENTS_SKIPPED_METRIC).getCount();
-    double avgFetchDuration =
-        metrics_.getTimer(EVENTS_FETCH_DURATION_METRIC).getMeanRate();
-    double avgProcessDuration =
-        metrics_.getTimer(EVENTS_PROCESS_DURATION_METRIC).getMeanRate();
+    eventProcessorMetrics.setEvents_received(eventsReceived);
+    eventProcessorMetrics.setEvents_skipped(eventsSkipped);
+
+    Snapshot fetchDuration =
+        metrics_.getTimer(EVENTS_FETCH_DURATION_METRIC).getSnapshot();
+    double avgFetchDuration = fetchDuration.getMean() / SECOND_IN_NANOS;
+    double p75FetchDuration = fetchDuration.get75thPercentile() / SECOND_IN_NANOS;
+    double p95FetchDuration = fetchDuration.get95thPercentile() / SECOND_IN_NANOS;
+    double p99FetchDuration = fetchDuration.get99thPercentile() / SECOND_IN_NANOS;
+    eventProcessorMetrics.setEvents_fetch_duration_mean(avgFetchDuration);
+    eventProcessorMetrics.setEvents_fetch_duration_p75(p75FetchDuration);
+    eventProcessorMetrics.setEvents_fetch_duration_p95(p95FetchDuration);
+    eventProcessorMetrics.setEvents_fetch_duration_p99(p99FetchDuration);
+
+    Snapshot processDuration =
+        metrics_.getTimer(EVENTS_PROCESS_DURATION_METRIC).getSnapshot();
+    double avgProcessDuration = processDuration.getMean() / SECOND_IN_NANOS;
+    double p75ProcessDuration = processDuration.get75thPercentile() / SECOND_IN_NANOS;
+    double p95ProcessDuration = processDuration.get95thPercentile() / SECOND_IN_NANOS;
+    double p99ProcessDuration = processDuration.get99thPercentile() / SECOND_IN_NANOS;
+    eventProcessorMetrics.setEvents_process_duration_mean(avgProcessDuration);
+    eventProcessorMetrics.setEvents_process_duration_p75(p75ProcessDuration);
+    eventProcessorMetrics.setEvents_process_duration_p95(p95ProcessDuration);
+    eventProcessorMetrics.setEvents_process_duration_p99(p99ProcessDuration);
+
+    double lastProcessDuration = lastEventProcessDurationNs_.get() /
+        (double) SECOND_IN_NANOS;
+    eventProcessorMetrics.setLast_events_process_duration(lastProcessDuration);
+
     double avgNumberOfEventsReceived1Min =
         metrics_.getMeter(EVENTS_RECEIVED_METRIC).getOneMinuteRate();
     double avgNumberOfEventsReceived5Min =
         metrics_.getMeter(EVENTS_RECEIVED_METRIC).getFiveMinuteRate();
     double avgNumberOfEventsReceived15Min =
         metrics_.getMeter(EVENTS_RECEIVED_METRIC).getFifteenMinuteRate();
-
-
-    eventProcessorMetrics.setEvents_received(eventsReceived);
-    eventProcessorMetrics.setEvents_skipped(eventsSkipped);
-    eventProcessorMetrics.setEvents_fetch_duration_mean(avgFetchDuration);
-    eventProcessorMetrics.setEvents_process_duration_mean(avgProcessDuration);
     eventProcessorMetrics.setEvents_received_1min_rate(avgNumberOfEventsReceived1Min);
     eventProcessorMetrics.setEvents_received_5min_rate(avgNumberOfEventsReceived5Min);
     eventProcessorMetrics.setEvents_received_15min_rate(avgNumberOfEventsReceived15Min);
@@ -935,6 +1009,7 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
           event.processIfEnabled();
           deleteEventLog_.garbageCollect(event.getEventId());
           lastSyncedEventId_.set(event.getEventId());
+          lastSyncedEventTimeMs_.set(event.getEventTime());
         }
       }
     } catch (CatalogException e) {
@@ -942,7 +1017,10 @@ public class MetastoreEventsProcessor implements ExternalEventsProcessor {
           "Unable to process event %d of type %s. Event processing will be stopped.",
           lastProcessedEvent.getEventId(), lastProcessedEvent.getEventType()), e);
     } finally {
-      context.stop();
+      long elapsed_ns = context.stop();
+      lastEventProcessDurationNs_.set(elapsed_ns);
+      LOG.info("Time elapsed in processing event batch: {}",
+          PrintUtils.printTimeNs(elapsed_ns));
     }
   }
 


[impala] 02/02: IMPALA-10610: Support multiple file formats in a single Iceberg Table

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f598b2ad68f59c2deda3d8cdf0974d8e9a55111f
Author: Gergely Fürnstáhl <gf...@cloudera.com>
AuthorDate: Thu Sep 1 16:27:16 2022 +0200

    IMPALA-10610: Support multiple file formats in a single Iceberg Table
    
    Added support for multiple file formats. Previously Impala created a
    Scanner class based on the partitions file format, now in case of an
    Iceberg table it will read out the file format from the file level
    metadata instead.
    
    IcebergScanNode will aggregate file formats as well instead of relying
    on partitions, so it can be used for plannig.
    
    Testing:
    
    Created a mixed file format table with hive and added a test for it.
    
    Change-Id: Ifc816595724e8fd2c885c6664f790af61ddf5c07
    Reviewed-on: http://gerrit.cloudera.org:8080/18935
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/hdfs-scan-node-base.cc                 |  93 +++++++----
 .../org/apache/impala/planner/HdfsScanNode.java    |   4 +-
 .../org/apache/impala/planner/IcebergScanNode.java |  28 +++-
 testdata/data/README                               |  12 ++
 ...0b6136a-job_16619542960420_0002-1-00001.parquet | Bin 0 -> 872 bytes
 ...e500a19c1d1-job_16619542960420_0003-1-00001.orc | Bin 0 -> 437 bytes
 ...80faff0-job_16619542960420_0004-1-00001.parquet | Bin 0 -> 872 bytes
 ...19c5500ed04-job_16619542960420_0004-1-00001.orc | Bin 0 -> 435 bytes
 .../055baf62-de6d-4583-bf21-f187f9482343-m0.avro   | Bin 0 -> 3297 bytes
 .../871d1473-8566-46c0-a530-a2256b3f396f-m0.avro   | Bin 0 -> 3297 bytes
 .../a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro   | Bin 0 -> 3304 bytes
 .../d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro   | Bin 0 -> 3305 bytes
 ...135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro | Bin 0 -> 1989 bytes
 ...859-1-055baf62-de6d-4583-bf21-f187f9482343.avro | Bin 0 -> 2379 bytes
 ...254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro | Bin 0 -> 2249 bytes
 ...572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro | Bin 0 -> 2119 bytes
 .../metadata/v1.metadata.json                      |  70 ++++++++
 .../metadata/v2.metadata.json                      |  94 +++++++++++
 .../metadata/v3.metadata.json                      | 100 ++++++++++++
 .../metadata/v4.metadata.json                      | 125 ++++++++++++++
 .../metadata/v5.metadata.json                      | 128 +++++++++++++++
 .../metadata/v6.metadata.json                      | 153 +++++++++++++++++
 .../metadata/v7.metadata.json                      | 156 ++++++++++++++++++
 .../metadata/v8.metadata.json                      | 181 +++++++++++++++++++++
 .../metadata/version-hint.txt                      |   1 +
 .../QueryTest/iceberg-mixed-file-format.test       |  11 ++
 tests/query_test/test_iceberg.py                   |   5 +
 27 files changed, 1123 insertions(+), 38 deletions(-)

diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc
index 75d2aa5d9..6ecc17dd6 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -856,43 +856,66 @@ const CodegenFnPtrBase* HdfsScanNodeBase::GetCodegenFn(THdfsFileFormat::type typ
 
 Status HdfsScanNodeBase::CreateAndOpenScannerHelper(HdfsPartitionDescriptor* partition,
     ScannerContext* context, scoped_ptr<HdfsScanner>* scanner) {
+  using namespace org::apache::impala::fb;
   DCHECK(context != nullptr);
   DCHECK(scanner->get() == nullptr);
-  THdfsCompression::type compression =
-      context->GetStream()->file_desc()->file_compression;
-
-  // Create a new scanner for this file format and compression.
-  switch (partition->file_format()) {
-    case THdfsFileFormat::TEXT:
-      if (HdfsTextScanner::HasBuiltinSupport(compression)) {
-        scanner->reset(new HdfsTextScanner(this, runtime_state_));
-      } else {
-        // No builtin support - we must have loaded the plugin in IssueInitialRanges().
-        auto it = _THdfsCompression_VALUES_TO_NAMES.find(compression);
-        DCHECK(it != _THdfsCompression_VALUES_TO_NAMES.end())
-            << "Already issued ranges for this compression type.";
-        scanner->reset(HdfsPluginTextScanner::GetHdfsPluginTextScanner(
-            this, runtime_state_, it->second));
-      }
-      break;
-    case THdfsFileFormat::SEQUENCE_FILE:
-      scanner->reset(new HdfsSequenceScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::RC_FILE:
-      scanner->reset(new HdfsRCFileScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::AVRO:
-      scanner->reset(new HdfsAvroScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::PARQUET:
-      scanner->reset(new HdfsParquetScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::ORC:
-      scanner->reset(new HdfsOrcScanner(this, runtime_state_));
-      break;
-    default:
-      return Status(Substitute("Unknown Hdfs file format type: $0",
-          partition->file_format()));
+
+  const FbFileMetadata* file_metadata = context->GetStream(0)->file_desc()->file_metadata;
+  if (file_metadata) {
+    // Iceberg tables can have different file format for each data file:
+    const FbIcebergMetadata* ice_metadata = file_metadata->iceberg_metadata();
+    DCHECK(ice_metadata != nullptr);
+    switch (ice_metadata->file_format()) {
+      case FbIcebergDataFileFormat::FbIcebergDataFileFormat_PARQUET:
+        scanner->reset(new HdfsParquetScanner(this, runtime_state_));
+        break;
+      case FbIcebergDataFileFormat::FbIcebergDataFileFormat_ORC:
+        scanner->reset(new HdfsOrcScanner(this, runtime_state_));
+        break;
+      case FbIcebergDataFileFormat::FbIcebergDataFileFormat_AVRO:
+        scanner->reset(new HdfsAvroScanner(this, runtime_state_));
+        break;
+      default:
+        return Status(Substitute(
+            "Unknown Iceberg file format type: $0", ice_metadata->file_format()));
+    }
+  } else {
+    THdfsCompression::type compression =
+        context->GetStream()->file_desc()->file_compression;
+
+    // Create a new scanner for this file format and compression.
+    switch (partition->file_format()) {
+      case THdfsFileFormat::TEXT:
+        if (HdfsTextScanner::HasBuiltinSupport(compression)) {
+          scanner->reset(new HdfsTextScanner(this, runtime_state_));
+        } else {
+          // No builtin support - we must have loaded the plugin in IssueInitialRanges().
+          auto it = _THdfsCompression_VALUES_TO_NAMES.find(compression);
+          DCHECK(it != _THdfsCompression_VALUES_TO_NAMES.end())
+              << "Already issued ranges for this compression type.";
+          scanner->reset(HdfsPluginTextScanner::GetHdfsPluginTextScanner(
+              this, runtime_state_, it->second));
+        }
+        break;
+      case THdfsFileFormat::SEQUENCE_FILE:
+        scanner->reset(new HdfsSequenceScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::RC_FILE:
+        scanner->reset(new HdfsRCFileScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::AVRO:
+        scanner->reset(new HdfsAvroScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::PARQUET:
+        scanner->reset(new HdfsParquetScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::ORC:
+        scanner->reset(new HdfsOrcScanner(this, runtime_state_));
+        break;
+      default:
+        return Status(
+            Substitute("Unknown Hdfs file format type: $0", partition->file_format()));
+    }
   }
   DCHECK(scanner->get() != nullptr);
   RETURN_IF_ERROR(scanner->get()->Open(context));
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index a3c8de51d..8c236d0f3 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -232,7 +232,7 @@ public class HdfsScanNode extends ScanNode {
   private Map<FileSystemUtil.FsType, Long> totalBytesPerFsEC_ = new TreeMap<>();
 
   // File formats scanned. Set in computeScanRangeLocations().
-  private Set<HdfsFileFormat> fileFormats_;
+  protected Set<HdfsFileFormat> fileFormats_;
 
   // Whether all formats scanned are Parquet. Set in computeScanRangeLocations().
   private boolean allParquet_ = false;
@@ -363,6 +363,7 @@ public class HdfsScanNode extends ScanNode {
       throw new IllegalStateException(error.toString());
     }
     isPartitionKeyScan_ = isPartitionKeyScan;
+    fileFormats_ = new HashSet<>();
   }
 
   /**
@@ -1168,7 +1169,6 @@ public class HdfsScanNode extends ScanNode {
     totalBytesPerFsEC_ = new TreeMap<>();
     largestScanRangeBytes_ = 0;
     maxScanRangeNumRows_ = -1;
-    fileFormats_ = new HashSet<>();
     boolean allParquet = (partitions_.size() > 0) ? true : false;
     long simpleLimitNumRows = 0; // only used for the simple limit case
     boolean isSimpleLimit = sampleParams_ == null &&
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index 5085e48fb..4532a8fe0 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -28,7 +28,11 @@ import org.apache.impala.catalog.FeCatalogUtils;
 import org.apache.impala.catalog.FeFsPartition;
 import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeIcebergTable;
+import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.ImpalaRuntimeException;
+import org.apache.impala.fb.FbIcebergDataFileFormat;
 
 import com.google.common.base.Preconditions;
 
@@ -40,13 +44,35 @@ public class IcebergScanNode extends HdfsScanNode {
   private List<FileDescriptor> fileDescs_;
 
   public IcebergScanNode(PlanNodeId id, TableRef tblRef, List<Expr> conjuncts,
-      MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs) {
+      MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs)
+      throws ImpalaRuntimeException {
     super(id, tblRef.getDesc(), conjuncts,
         getIcebergPartition(((FeIcebergTable)tblRef.getTable()).getFeFsTable()), tblRef,
         aggInfo, null, false);
     // Hdfs table transformed from iceberg table only has one partition
     Preconditions.checkState(partitions_.size() == 1);
+
     fileDescs_ = fileDescs;
+
+    boolean hasParquet = false;
+    boolean hasOrc = false;
+    boolean hasAvro = false;
+    for (FileDescriptor fileDesc : fileDescs_) {
+      byte fileFormat = fileDesc.getFbFileMetadata().icebergMetadata().fileFormat();
+      if (fileFormat == FbIcebergDataFileFormat.PARQUET) {
+        hasParquet = true;
+      } else if (fileFormat == FbIcebergDataFileFormat.ORC) {
+        hasOrc = true;
+      } else if (fileFormat == FbIcebergDataFileFormat.AVRO) {
+        hasAvro = true;
+      } else {
+        throw new ImpalaRuntimeException(String.format(
+            "Invalid Iceberg file format of file: %s", fileDesc.getAbsolutePath()));
+      }
+    }
+    if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET);
+    if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC);
+    if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO);
   }
 
   /**
diff --git a/testdata/data/README b/testdata/data/README
index 3f02fc9be..329ab0938 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -896,3 +896,15 @@ Step 4, update the table property 'write.data.path' to '/test-warehouse/iceberg_
 2,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09
 2,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09
 0,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09
+
+iceberg_test/iceberg_migrated_alter_test_orc
+Generated by Hive
+create table iceberg_mixed_file_format_test (i int, s string, d double) stored by iceberg;
+insert into iceberg_mixed_file_format_test values (1, "A", 0.5);
+alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="orc");
+insert into iceberg_mixed_file_format_test values (2, "B", 1.5);
+alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="parquet");
+insert into iceberg_mixed_file_format_test values (3, "C", 2.5);
+alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="orc");
+insert into iceberg_mixed_file_format_test values (4, "D", 3.5);
+Converted similarly to iceberg_v2_no_deletes
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet
new file mode 100644
index 000000000..e6ff25697
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc
new file mode 100644
index 000000000..aa00a17c5
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet
new file mode 100644
index 000000000..34e2f2d84
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc
new file mode 100644
index 000000000..b97410635
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro
new file mode 100644
index 000000000..ad8b04136
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro
new file mode 100644
index 000000000..a6a5806a2
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro
new file mode 100644
index 000000000..1219c1936
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro
new file mode 100644
index 000000000..0d7c4fc73
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro
new file mode 100644
index 000000000..7fff53ff9
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro
new file mode 100644
index 000000000..559bc89da
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro
new file mode 100644
index 000000000..12e652230
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro
new file mode 100644
index 000000000..53bea52c9
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v1.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v1.metadata.json
new file mode 100644
index 000000000..5f8fd441d
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v1.metadata.json
@@ -0,0 +1,70 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456462742,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : -1,
+  "snapshots" : [ ],
+  "snapshot-log" : [ ],
+  "metadata-log" : [ ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json
new file mode 100644
index 000000000..f74188ff5
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json
@@ -0,0 +1,94 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456653023,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 1847986799759674135,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v3.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v3.metadata.json
new file mode 100644
index 000000000..2972f1ed9
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v3.metadata.json
@@ -0,0 +1,100 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456709188,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662456709",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 1847986799759674135,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json
new file mode 100644
index 000000000..c7af02a60
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json
@@ -0,0 +1,125 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456781190,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662456709",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 6603964444137547572,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json
new file mode 100644
index 000000000..2fcd6ec89
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json
@@ -0,0 +1,128 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457702185,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457702",
+    "write.format.default" : "parquet",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 6603964444137547572,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json
new file mode 100644
index 000000000..0efe82d5d
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json
@@ -0,0 +1,153 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457721334,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457702",
+    "write.format.default" : "parquet",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 5589333083859779254,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5589333083859779254,
+    "parent-snapshot-id" : 6603964444137547572,
+    "timestamp-ms" : 1662457721334,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "3",
+      "total-files-size" : "2181",
+      "total-data-files" : "3",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "snapshot-id" : 5589333083859779254
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457702185,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00004-bdcf60af-58a5-45ee-8301-1192e27e955a.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json
new file mode 100644
index 000000000..ceb351e90
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json
@@ -0,0 +1,156 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457725652,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457725",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 5589333083859779254,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5589333083859779254,
+    "parent-snapshot-id" : 6603964444137547572,
+    "timestamp-ms" : 1662457721334,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "3",
+      "total-files-size" : "2181",
+      "total-data-files" : "3",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "snapshot-id" : 5589333083859779254
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457702185,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00004-bdcf60af-58a5-45ee-8301-1192e27e955a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00005-286bb9d4-6ec5-4e12-8a27-ac7588da4b86.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json
new file mode 100644
index 000000000..098682a91
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json
@@ -0,0 +1,181 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457745425,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457725",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 5574591442446832859,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5589333083859779254,
+    "parent-snapshot-id" : 6603964444137547572,
+    "timestamp-ms" : 1662457721334,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "3",
+      "total-files-size" : "2181",
+      "total-data-files" : "3",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5574591442446832859,
+    "parent-snapshot-id" : 5589333083859779254,
+    "timestamp-ms" : 1662457745425,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "435",
+      "changed-partition-count" : "1",
+      "total-records" : "4",
+      "total-files-size" : "2616",
+      "total-data-files" : "4",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "snapshot-id" : 5589333083859779254
+  }, {
+    "timestamp-ms" : 1662457745425,
+    "snapshot-id" : 5574591442446832859
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457702185,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00004-bdcf60af-58a5-45ee-8301-1192e27e955a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00005-286bb9d4-6ec5-4e12-8a27-ac7588da4b86.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457725652,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00006-29be9914-209f-4c92-8b91-d87029bd1ed4.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt
new file mode 100644
index 000000000..45a4fb75d
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt
@@ -0,0 +1 @@
+8
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
new file mode 100644
index 000000000..03b19b031
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
@@ -0,0 +1,11 @@
+====
+---- QUERY
+select * from iceberg_mixed_file_format_test;
+---- RESULTS
+2,'B',1.5
+3,'C',2.5
+4,'D',3.5
+1,'A',0.5
+---- TYPES
+INT, STRING, DOUBLE
+====
\ No newline at end of file
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 4dc20f94e..b8ab0cef1 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -784,6 +784,11 @@ class TestIcebergTable(IcebergTestSuite):
     self.run_test_case('QueryTest/iceberg-multiple-storage-locations-table',
                        vector, unique_database)
 
+  def test_mixed_file_format(self, vector, unique_database):
+    create_iceberg_table_from_directory(self.client, unique_database,
+                                        "iceberg_mixed_file_format_test", "parquet")
+    self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
+                      unique_database)
 
 class TestIcebergV2Table(IcebergTestSuite):
   """Tests related to Iceberg V2 tables."""