You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/09/08 22:11:29 UTC

[impala] 02/02: IMPALA-10610: Support multiple file formats in a single Iceberg Table

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f598b2ad68f59c2deda3d8cdf0974d8e9a55111f
Author: Gergely Fürnstáhl <gf...@cloudera.com>
AuthorDate: Thu Sep 1 16:27:16 2022 +0200

    IMPALA-10610: Support multiple file formats in a single Iceberg Table
    
    Added support for multiple file formats. Previously Impala created a
    Scanner class based on the partitions file format, now in case of an
    Iceberg table it will read out the file format from the file level
    metadata instead.
    
    IcebergScanNode will aggregate file formats as well instead of relying
    on partitions, so it can be used for plannig.
    
    Testing:
    
    Created a mixed file format table with hive and added a test for it.
    
    Change-Id: Ifc816595724e8fd2c885c6664f790af61ddf5c07
    Reviewed-on: http://gerrit.cloudera.org:8080/18935
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/hdfs-scan-node-base.cc                 |  93 +++++++----
 .../org/apache/impala/planner/HdfsScanNode.java    |   4 +-
 .../org/apache/impala/planner/IcebergScanNode.java |  28 +++-
 testdata/data/README                               |  12 ++
 ...0b6136a-job_16619542960420_0002-1-00001.parquet | Bin 0 -> 872 bytes
 ...e500a19c1d1-job_16619542960420_0003-1-00001.orc | Bin 0 -> 437 bytes
 ...80faff0-job_16619542960420_0004-1-00001.parquet | Bin 0 -> 872 bytes
 ...19c5500ed04-job_16619542960420_0004-1-00001.orc | Bin 0 -> 435 bytes
 .../055baf62-de6d-4583-bf21-f187f9482343-m0.avro   | Bin 0 -> 3297 bytes
 .../871d1473-8566-46c0-a530-a2256b3f396f-m0.avro   | Bin 0 -> 3297 bytes
 .../a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro   | Bin 0 -> 3304 bytes
 .../d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro   | Bin 0 -> 3305 bytes
 ...135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro | Bin 0 -> 1989 bytes
 ...859-1-055baf62-de6d-4583-bf21-f187f9482343.avro | Bin 0 -> 2379 bytes
 ...254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro | Bin 0 -> 2249 bytes
 ...572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro | Bin 0 -> 2119 bytes
 .../metadata/v1.metadata.json                      |  70 ++++++++
 .../metadata/v2.metadata.json                      |  94 +++++++++++
 .../metadata/v3.metadata.json                      | 100 ++++++++++++
 .../metadata/v4.metadata.json                      | 125 ++++++++++++++
 .../metadata/v5.metadata.json                      | 128 +++++++++++++++
 .../metadata/v6.metadata.json                      | 153 +++++++++++++++++
 .../metadata/v7.metadata.json                      | 156 ++++++++++++++++++
 .../metadata/v8.metadata.json                      | 181 +++++++++++++++++++++
 .../metadata/version-hint.txt                      |   1 +
 .../QueryTest/iceberg-mixed-file-format.test       |  11 ++
 tests/query_test/test_iceberg.py                   |   5 +
 27 files changed, 1123 insertions(+), 38 deletions(-)

diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc
index 75d2aa5d9..6ecc17dd6 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -856,43 +856,66 @@ const CodegenFnPtrBase* HdfsScanNodeBase::GetCodegenFn(THdfsFileFormat::type typ
 
 Status HdfsScanNodeBase::CreateAndOpenScannerHelper(HdfsPartitionDescriptor* partition,
     ScannerContext* context, scoped_ptr<HdfsScanner>* scanner) {
+  using namespace org::apache::impala::fb;
   DCHECK(context != nullptr);
   DCHECK(scanner->get() == nullptr);
-  THdfsCompression::type compression =
-      context->GetStream()->file_desc()->file_compression;
-
-  // Create a new scanner for this file format and compression.
-  switch (partition->file_format()) {
-    case THdfsFileFormat::TEXT:
-      if (HdfsTextScanner::HasBuiltinSupport(compression)) {
-        scanner->reset(new HdfsTextScanner(this, runtime_state_));
-      } else {
-        // No builtin support - we must have loaded the plugin in IssueInitialRanges().
-        auto it = _THdfsCompression_VALUES_TO_NAMES.find(compression);
-        DCHECK(it != _THdfsCompression_VALUES_TO_NAMES.end())
-            << "Already issued ranges for this compression type.";
-        scanner->reset(HdfsPluginTextScanner::GetHdfsPluginTextScanner(
-            this, runtime_state_, it->second));
-      }
-      break;
-    case THdfsFileFormat::SEQUENCE_FILE:
-      scanner->reset(new HdfsSequenceScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::RC_FILE:
-      scanner->reset(new HdfsRCFileScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::AVRO:
-      scanner->reset(new HdfsAvroScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::PARQUET:
-      scanner->reset(new HdfsParquetScanner(this, runtime_state_));
-      break;
-    case THdfsFileFormat::ORC:
-      scanner->reset(new HdfsOrcScanner(this, runtime_state_));
-      break;
-    default:
-      return Status(Substitute("Unknown Hdfs file format type: $0",
-          partition->file_format()));
+
+  const FbFileMetadata* file_metadata = context->GetStream(0)->file_desc()->file_metadata;
+  if (file_metadata) {
+    // Iceberg tables can have different file format for each data file:
+    const FbIcebergMetadata* ice_metadata = file_metadata->iceberg_metadata();
+    DCHECK(ice_metadata != nullptr);
+    switch (ice_metadata->file_format()) {
+      case FbIcebergDataFileFormat::FbIcebergDataFileFormat_PARQUET:
+        scanner->reset(new HdfsParquetScanner(this, runtime_state_));
+        break;
+      case FbIcebergDataFileFormat::FbIcebergDataFileFormat_ORC:
+        scanner->reset(new HdfsOrcScanner(this, runtime_state_));
+        break;
+      case FbIcebergDataFileFormat::FbIcebergDataFileFormat_AVRO:
+        scanner->reset(new HdfsAvroScanner(this, runtime_state_));
+        break;
+      default:
+        return Status(Substitute(
+            "Unknown Iceberg file format type: $0", ice_metadata->file_format()));
+    }
+  } else {
+    THdfsCompression::type compression =
+        context->GetStream()->file_desc()->file_compression;
+
+    // Create a new scanner for this file format and compression.
+    switch (partition->file_format()) {
+      case THdfsFileFormat::TEXT:
+        if (HdfsTextScanner::HasBuiltinSupport(compression)) {
+          scanner->reset(new HdfsTextScanner(this, runtime_state_));
+        } else {
+          // No builtin support - we must have loaded the plugin in IssueInitialRanges().
+          auto it = _THdfsCompression_VALUES_TO_NAMES.find(compression);
+          DCHECK(it != _THdfsCompression_VALUES_TO_NAMES.end())
+              << "Already issued ranges for this compression type.";
+          scanner->reset(HdfsPluginTextScanner::GetHdfsPluginTextScanner(
+              this, runtime_state_, it->second));
+        }
+        break;
+      case THdfsFileFormat::SEQUENCE_FILE:
+        scanner->reset(new HdfsSequenceScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::RC_FILE:
+        scanner->reset(new HdfsRCFileScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::AVRO:
+        scanner->reset(new HdfsAvroScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::PARQUET:
+        scanner->reset(new HdfsParquetScanner(this, runtime_state_));
+        break;
+      case THdfsFileFormat::ORC:
+        scanner->reset(new HdfsOrcScanner(this, runtime_state_));
+        break;
+      default:
+        return Status(
+            Substitute("Unknown Hdfs file format type: $0", partition->file_format()));
+    }
   }
   DCHECK(scanner->get() != nullptr);
   RETURN_IF_ERROR(scanner->get()->Open(context));
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index a3c8de51d..8c236d0f3 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -232,7 +232,7 @@ public class HdfsScanNode extends ScanNode {
   private Map<FileSystemUtil.FsType, Long> totalBytesPerFsEC_ = new TreeMap<>();
 
   // File formats scanned. Set in computeScanRangeLocations().
-  private Set<HdfsFileFormat> fileFormats_;
+  protected Set<HdfsFileFormat> fileFormats_;
 
   // Whether all formats scanned are Parquet. Set in computeScanRangeLocations().
   private boolean allParquet_ = false;
@@ -363,6 +363,7 @@ public class HdfsScanNode extends ScanNode {
       throw new IllegalStateException(error.toString());
     }
     isPartitionKeyScan_ = isPartitionKeyScan;
+    fileFormats_ = new HashSet<>();
   }
 
   /**
@@ -1168,7 +1169,6 @@ public class HdfsScanNode extends ScanNode {
     totalBytesPerFsEC_ = new TreeMap<>();
     largestScanRangeBytes_ = 0;
     maxScanRangeNumRows_ = -1;
-    fileFormats_ = new HashSet<>();
     boolean allParquet = (partitions_.size() > 0) ? true : false;
     long simpleLimitNumRows = 0; // only used for the simple limit case
     boolean isSimpleLimit = sampleParams_ == null &&
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index 5085e48fb..4532a8fe0 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -28,7 +28,11 @@ import org.apache.impala.catalog.FeCatalogUtils;
 import org.apache.impala.catalog.FeFsPartition;
 import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeIcebergTable;
+import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.ImpalaRuntimeException;
+import org.apache.impala.fb.FbIcebergDataFileFormat;
 
 import com.google.common.base.Preconditions;
 
@@ -40,13 +44,35 @@ public class IcebergScanNode extends HdfsScanNode {
   private List<FileDescriptor> fileDescs_;
 
   public IcebergScanNode(PlanNodeId id, TableRef tblRef, List<Expr> conjuncts,
-      MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs) {
+      MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs)
+      throws ImpalaRuntimeException {
     super(id, tblRef.getDesc(), conjuncts,
         getIcebergPartition(((FeIcebergTable)tblRef.getTable()).getFeFsTable()), tblRef,
         aggInfo, null, false);
     // Hdfs table transformed from iceberg table only has one partition
     Preconditions.checkState(partitions_.size() == 1);
+
     fileDescs_ = fileDescs;
+
+    boolean hasParquet = false;
+    boolean hasOrc = false;
+    boolean hasAvro = false;
+    for (FileDescriptor fileDesc : fileDescs_) {
+      byte fileFormat = fileDesc.getFbFileMetadata().icebergMetadata().fileFormat();
+      if (fileFormat == FbIcebergDataFileFormat.PARQUET) {
+        hasParquet = true;
+      } else if (fileFormat == FbIcebergDataFileFormat.ORC) {
+        hasOrc = true;
+      } else if (fileFormat == FbIcebergDataFileFormat.AVRO) {
+        hasAvro = true;
+      } else {
+        throw new ImpalaRuntimeException(String.format(
+            "Invalid Iceberg file format of file: %s", fileDesc.getAbsolutePath()));
+      }
+    }
+    if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET);
+    if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC);
+    if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO);
   }
 
   /**
diff --git a/testdata/data/README b/testdata/data/README
index 3f02fc9be..329ab0938 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -896,3 +896,15 @@ Step 4, update the table property 'write.data.path' to '/test-warehouse/iceberg_
 2,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09
 2,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09
 0,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09
+
+iceberg_test/iceberg_migrated_alter_test_orc
+Generated by Hive
+create table iceberg_mixed_file_format_test (i int, s string, d double) stored by iceberg;
+insert into iceberg_mixed_file_format_test values (1, "A", 0.5);
+alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="orc");
+insert into iceberg_mixed_file_format_test values (2, "B", 1.5);
+alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="parquet");
+insert into iceberg_mixed_file_format_test values (3, "C", 2.5);
+alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="orc");
+insert into iceberg_mixed_file_format_test values (4, "D", 3.5);
+Converted similarly to iceberg_v2_no_deletes
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet
new file mode 100644
index 000000000..e6ff25697
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-fff150b6136a-job_16619542960420_0002-1-00001.parquet differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc
new file mode 100644
index 000000000..aa00a17c5
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-ee500a19c1d1-job_16619542960420_0003-1-00001.orc differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet
new file mode 100644
index 000000000..34e2f2d84
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-27ff880faff0-job_16619542960420_0004-1-00001.parquet differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc
new file mode 100644
index 000000000..b97410635
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-519c5500ed04-job_16619542960420_0004-1-00001.orc differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro
new file mode 100644
index 000000000..ad8b04136
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/055baf62-de6d-4583-bf21-f187f9482343-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro
new file mode 100644
index 000000000..a6a5806a2
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/871d1473-8566-46c0-a530-a2256b3f396f-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro
new file mode 100644
index 000000000..1219c1936
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/a0332d05-4c9c-4640-8656-69ba61b7859a-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro
new file mode 100644
index 000000000..0d7c4fc73
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/d43cc1ea-096f-4594-9583-b1b27f8f0230-m0.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro
new file mode 100644
index 000000000..7fff53ff9
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro
new file mode 100644
index 000000000..559bc89da
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro
new file mode 100644
index 000000000..12e652230
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro
new file mode 100644
index 000000000..53bea52c9
Binary files /dev/null and b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro differ
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v1.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v1.metadata.json
new file mode 100644
index 000000000..5f8fd441d
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v1.metadata.json
@@ -0,0 +1,70 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456462742,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : -1,
+  "snapshots" : [ ],
+  "snapshot-log" : [ ],
+  "metadata-log" : [ ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json
new file mode 100644
index 000000000..f74188ff5
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v2.metadata.json
@@ -0,0 +1,94 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456653023,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "bucketing_version" : "2",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 1847986799759674135,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v3.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v3.metadata.json
new file mode 100644
index 000000000..2972f1ed9
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v3.metadata.json
@@ -0,0 +1,100 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456709188,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662456709",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 1847986799759674135,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json
new file mode 100644
index 000000000..c7af02a60
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v4.metadata.json
@@ -0,0 +1,125 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662456781190,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662456709",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 6603964444137547572,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json
new file mode 100644
index 000000000..2fcd6ec89
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v5.metadata.json
@@ -0,0 +1,128 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457702185,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457702",
+    "write.format.default" : "parquet",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 6603964444137547572,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json
new file mode 100644
index 000000000..0efe82d5d
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v6.metadata.json
@@ -0,0 +1,153 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457721334,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457702",
+    "write.format.default" : "parquet",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 5589333083859779254,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5589333083859779254,
+    "parent-snapshot-id" : 6603964444137547572,
+    "timestamp-ms" : 1662457721334,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "3",
+      "total-files-size" : "2181",
+      "total-data-files" : "3",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "snapshot-id" : 5589333083859779254
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457702185,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00004-bdcf60af-58a5-45ee-8301-1192e27e955a.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json
new file mode 100644
index 000000000..ceb351e90
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v7.metadata.json
@@ -0,0 +1,156 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457725652,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457725",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 5589333083859779254,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5589333083859779254,
+    "parent-snapshot-id" : 6603964444137547572,
+    "timestamp-ms" : 1662457721334,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "3",
+      "total-files-size" : "2181",
+      "total-data-files" : "3",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "snapshot-id" : 5589333083859779254
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457702185,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00004-bdcf60af-58a5-45ee-8301-1192e27e955a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00005-286bb9d4-6ec5-4e12-8a27-ac7588da4b86.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json
new file mode 100644
index 000000000..098682a91
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/v8.metadata.json
@@ -0,0 +1,181 @@
+{
+  "format-version" : 1,
+  "table-uuid" : "2d1d7fdc-e16f-431a-aaf1-ccc37bee65cc",
+  "location" : "/test-warehouse/iceberg_mixed_file_format_test",
+  "last-updated-ms" : 1662457745425,
+  "last-column-id" : 3,
+  "schema" : {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  },
+  "current-schema-id" : 0,
+  "schemas" : [ {
+    "type" : "struct",
+    "schema-id" : 0,
+    "fields" : [ {
+      "id" : 1,
+      "name" : "i",
+      "required" : false,
+      "type" : "int"
+    }, {
+      "id" : 2,
+      "name" : "s",
+      "required" : false,
+      "type" : "string"
+    }, {
+      "id" : 3,
+      "name" : "d",
+      "required" : false,
+      "type" : "double"
+    } ]
+  } ],
+  "partition-spec" : [ ],
+  "default-spec-id" : 0,
+  "partition-specs" : [ {
+    "spec-id" : 0,
+    "fields" : [ ]
+  } ],
+  "last-partition-id" : 999,
+  "default-sort-order-id" : 0,
+  "sort-orders" : [ {
+    "order-id" : 0,
+    "fields" : [ ]
+  } ],
+  "properties" : {
+    "engine.hive.enabled" : "true",
+    "last_modified_time" : "1662457725",
+    "write.format.default" : "orc",
+    "bucketing_version" : "2",
+    "last_modified_by" : "gfurnstahl",
+    "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",
+    "serialization.format" : "1"
+  },
+  "current-snapshot-id" : 5574591442446832859,
+  "snapshots" : [ {
+    "snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456653023,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "1",
+      "total-files-size" : "872",
+      "total-data-files" : "1",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-1847986799759674135-1-a0332d05-4c9c-4640-8656-69ba61b7859a.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 6603964444137547572,
+    "parent-snapshot-id" : 1847986799759674135,
+    "timestamp-ms" : 1662456781190,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "437",
+      "changed-partition-count" : "1",
+      "total-records" : "2",
+      "total-files-size" : "1309",
+      "total-data-files" : "2",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-6603964444137547572-1-871d1473-8566-46c0-a530-a2256b3f396f.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5589333083859779254,
+    "parent-snapshot-id" : 6603964444137547572,
+    "timestamp-ms" : 1662457721334,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "872",
+      "changed-partition-count" : "1",
+      "total-records" : "3",
+      "total-files-size" : "2181",
+      "total-data-files" : "3",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5589333083859779254-1-d43cc1ea-096f-4594-9583-b1b27f8f0230.avro",
+    "schema-id" : 0
+  }, {
+    "snapshot-id" : 5574591442446832859,
+    "parent-snapshot-id" : 5589333083859779254,
+    "timestamp-ms" : 1662457745425,
+    "summary" : {
+      "operation" : "append",
+      "added-data-files" : "1",
+      "added-records" : "1",
+      "added-files-size" : "435",
+      "changed-partition-count" : "1",
+      "total-records" : "4",
+      "total-files-size" : "2616",
+      "total-data-files" : "4",
+      "total-delete-files" : "0",
+      "total-position-deletes" : "0",
+      "total-equality-deletes" : "0"
+    },
+    "manifest-list" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/snap-5574591442446832859-1-055baf62-de6d-4583-bf21-f187f9482343.avro",
+    "schema-id" : 0
+  } ],
+  "snapshot-log" : [ {
+    "timestamp-ms" : 1662456653023,
+    "snapshot-id" : 1847986799759674135
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "snapshot-id" : 6603964444137547572
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "snapshot-id" : 5589333083859779254
+  }, {
+    "timestamp-ms" : 1662457745425,
+    "snapshot-id" : 5574591442446832859
+  } ],
+  "metadata-log" : [ {
+    "timestamp-ms" : 1662456462742,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00000-f7855248-24e0-4c04-bdee-faceb199b0eb.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456653023,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00001-ecdbd9a2-7ca5-41a3-b4dc-50036053a095.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456709188,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00002-530b0b54-627c-490b-91f0-f0aea992561a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662456781190,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00003-7391ea34-55d6-4474-9e43-279e0f5e94d2.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457702185,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00004-bdcf60af-58a5-45ee-8301-1192e27e955a.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457721334,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00005-286bb9d4-6ec5-4e12-8a27-ac7588da4b86.metadata.json"
+  }, {
+    "timestamp-ms" : 1662457725652,
+    "metadata-file" : "/test-warehouse/iceberg_mixed_file_format_test/metadata/00006-29be9914-209f-4c92-8b91-d87029bd1ed4.metadata.json"
+  } ]
+}
\ No newline at end of file
diff --git a/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt
new file mode 100644
index 000000000..45a4fb75d
--- /dev/null
+++ b/testdata/data/iceberg_test/iceberg_mixed_file_format_test/metadata/version-hint.txt
@@ -0,0 +1 @@
+8
diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
new file mode 100644
index 000000000..03b19b031
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-mixed-file-format.test
@@ -0,0 +1,11 @@
+====
+---- QUERY
+select * from iceberg_mixed_file_format_test;
+---- RESULTS
+2,'B',1.5
+3,'C',2.5
+4,'D',3.5
+1,'A',0.5
+---- TYPES
+INT, STRING, DOUBLE
+====
\ No newline at end of file
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 4dc20f94e..b8ab0cef1 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -784,6 +784,11 @@ class TestIcebergTable(IcebergTestSuite):
     self.run_test_case('QueryTest/iceberg-multiple-storage-locations-table',
                        vector, unique_database)
 
+  def test_mixed_file_format(self, vector, unique_database):
+    create_iceberg_table_from_directory(self.client, unique_database,
+                                        "iceberg_mixed_file_format_test", "parquet")
+    self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
+                      unique_database)
 
 class TestIcebergV2Table(IcebergTestSuite):
   """Tests related to Iceberg V2 tables."""